I'm trying to calculate a metric to evaluate the coherence of each topic in my corpus in this code:
import tmtoolkit
from tmtoolkit.topicmod.evaluate import metric_coherence_gensim
def topic_model_coherence_generator(topic_num_start=2,
topic_num_end=6,
norm_corpus='',
cv_matrix='',
cv=''):
norm_corpus_tokens = [doc11.split() for doc11 in norm_corpus]
models = []
coherence_scores = []
for i in range(topic_num_start, topic_num_end):
print(i)
cur_lda = LatentDirichletAllocation(n_components=i,
max_iter=10000,
random_state=0)
cur_lda.fit_transform(cv_matrix)
cur_coherence_score = metric_coherence_gensim(
measure='c_v',
top_n=5,
topic_word_distrib=cur_lda.components_,
dtm=cv.fit_transform(norm_corpus),
vocab=np.array(cv.get_feature_names()),
texts=norm_corpus_tokens)
models.append(cur_lda)
coherence_scores.append(np.mean(cur_coherence_score)
return models, coherence_scores
%%time
ts = 2
te = 10
models, coherence_scores = topic_model_coherence_generator(
ts, te, norm_corpus=norm_corpus, cv=cv, cv_matrix=cv_matrix)
it display this error: TypeError: argument of type 'FakedGensimDict' is not iterable
Can anyone help me to fix this code ??