Source code for lda_over_time.models.dtm_model_interface

"""
DtmModelInterface is an interface to create new DTM modules, these modules
should train the model and return values to front end.
"""

[docs]class DtmModelInterface: """ DtmModelInterface defines methods and attributes that a module should have \ in order to be passed to front end. :param corpus: Each item from the list is one document from corpus. :type corpus: list[str] :param dates: List of timestamps for each document in corpus, each \ date's position should match with its respective text. :type dates: list[str] :param date_format: The date format used in `dates`. :type date_format: str :param freq: The frequency used to group texts. :type freq: str :param n_topics: Number of topics that the DTM model should find. The \ default value is 100. :type n_topics: int, optional :param sep: Separator used to split each word, the default value is any \ blank space. :type sep: str, optional :param workers: Number of workers (cpus) to use. If not provided, it \ will use the value of multiprocessing.cpu_count() :type workers: int, optional """ @property def n_timeslices(self): """ This attribute should return the number of timeslices found. :return: It should return the number of time slices found in corpus. :rtype: int """ pass
[docs] def get_results(self): """ This method should return a table representing the evolution of each \ topic over time. :return: It must return a Pandas' dataframe where rows represents \ different time slices and they are sorted by date, it must have one \ column `data` and the remaining columns numbered from 0 to k (number \ of topics - 1) that holds weights of each topic in that period. :rtype: pandas.core.frame.DataFrame """ pass
[docs] def get_topic_words(self, topic_id, i, n): """ This method should return the top n words that better describes the \ topic in a specific time slice. :param topic_id: The id of the desired topic. :type topic_id: int :param i: The position of the desired timeslice in chronological \ order the first (oldest) time slice is indexed by 1. :type i: int :param n: This specifies how many words that better describes the \ topic at a specific time slice should be returned. :type n: int :return: It returns a list of top n words that best describes the \ requested topic in a specific time. :rtype: list[str] """ pass
[docs] def prepare_args(self, i): """ This method should return a dictionary with all necessary values to \ call PyLdaVis.prepare method. :param i: The position of the desired timeslice in chronological \ order, the first (oldest) time slice is indexed by 1. :type i: int :return: It returns a dictionary ready to be passed to PyLdaVis :rtype: dict[str, any] """ pass
[docs] def train(self): """ This method should train the dtm model. :return: nothing :rtype: None """ pass