Source code for lda_over_time.models.dtm_model_interface

"""
DtmModelInterface is an interface to create new DTM modules, these modules
should train the model and return values to front end.
"""

[docs]class DtmModelInterface:
    """
    DtmModelInterface defines methods and attributes that a module should have \
    in order to be passed to front end.

    :param corpus: Each item from the list is one document from corpus.
    :type corpus: list[str]

    :param dates: List of timestamps for each document in corpus, each \
    date's position should match with its respective text.
    :type dates: list[str]

    :param date_format: The date format used in `dates`.
    :type date_format: str

    :param freq: The frequency used to group texts.
    :type freq: str

    :param n_topics: Number of topics that the DTM model should find. The \
    default value is 100.
    :type n_topics: int, optional

    :param sep: Separator used to split each word, the default value is any \
    blank space.
    :type sep: str, optional

    :param workers: Number of workers (cpus) to use. If not provided, it \
    will use the value of multiprocessing.cpu_count()
    :type workers: int, optional

    """

    @property
    def n_timeslices(self):
        """
        This attribute should return the number of timeslices found.

        :return: It should return the number of time slices found in corpus.
        :rtype: int

        """
        pass


[docs]    def get_results(self):
        """
        This method should return a table representing the evolution of each \
        topic over time.

        :return: It must return a Pandas' dataframe where rows represents \
        different time slices and they are sorted by date, it must have one \
        column `data` and the remaining columns numbered from 0 to k (number \
        of topics - 1) that holds weights of each topic in that period.
        :rtype: pandas.core.frame.DataFrame

        """
        pass


[docs]    def get_topic_words(self, topic_id, i, n):
        """
        This method should return the top n words that better describes the \
        topic in a specific time slice.

        :param topic_id: The id of the desired topic.
        :type topic_id: int

        :param i: The position of the desired timeslice in chronological \
        order the first (oldest) time slice is indexed by 1.
        :type i: int

        :param n: This specifies how many words that better describes the \
        topic at a specific time slice should be returned.
        :type n: int

        :return: It returns a list of top n words that best describes the \
        requested topic in a specific time.
        :rtype: list[str]

        """
        pass


[docs]    def prepare_args(self, i):
        """
        This method should return a dictionary with all necessary values to \
        call PyLdaVis.prepare method.

        :param i: The position of the desired timeslice in chronological \
        order, the first (oldest) time slice is indexed by 1.
        :type i: int

        :return: It returns a dictionary ready to be passed to PyLdaVis
        :rtype: dict[str, any]

        """
        pass


[docs]    def train(self):
        """
        This method should train the dtm model.

        :return: nothing
        :rtype: None

        """
        pass