How to reproduce gensim Lda Model

15 views Asked by At

When using lda model in gensim, I can reproduce the inference result only by running this script by "train_pred". But when I save the model and do the inference procedure by "pred", the result is different. Basically, I want to use this model to create determinated variables. Any ideas to make the inference result reproducible?

from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel
import numpy as np
import pickle
import pandas as pd
import argparse
from gensim import corpora, models
from gensim.test.utils import datapath

def readCL():
    parser = argparse.ArgumentParser()
    parser.add_argument("-s","--stage", default='dict')
    args = parser.parse_args()
    return args.stage

if __name__ =='__main__':
    stage = readCL()
    num_topics=5
    temp_file = datapath('D:/Test/ldamodel_%s.model'%(num_topics))
    if stage =='dict':
        common_dictionary = Dictionary(common_texts)
        common_dictionary.save('dictionary.dictionary')
        
        
    elif stage == 'train_pred': 
        common_dictionary=corpora.Dictionary.load("dictionary.dictionary")      
        common_corpus = [common_dictionary.doc2bow(text) for text in common_texts]
        # before training
        np.random.seed(1)
        lda = LdaModel(common_corpus, num_topics=num_topics)
        print('After training--',np.random.get_state()[1][0])
        lda.save(temp_file)
           
    elif stage == 'pred':
        common_dictionary=corpora.Dictionary.load("dictionary.dictionary") 
        lda=models.ldamodel.LdaModel.load(temp_file)
        print(np.random.get_state()[1][0])
        
    if stage in ['train_pred','pred']:
        other_texts = [
            ['computer', 'time', 'graph'],
            ['survey', 'response', 'eps'],
            ['human', 'system', 'computer']
        ]

        other_corpus = [common_dictionary.doc2bow(text) for text in other_texts]
        unseen_doc = other_corpus[0]

        np.random.seed(1)
        print('Before inference--',np.random.get_state()[1][0])
        a = lda[unseen_doc]
        print('After inference--',np.random.get_state()[1][0])
        np.random.seed(1)
        print('Before inference--',np.random.get_state()[1][0])
        b = lda[unseen_doc]
        print('After inference--',np.random.get_state()[1][0])
        

        for i in range(len(a)):
            print(a[i][-1]==b[i][-1])

enter image description here

0

There are 0 answers