I have a error when trying to call calculate_similarity2 function which in in DocSim.py file from my notebook.

The error message is : 'DocSim' object has no attribute 'calculate_similarity2'

Here the content of my docsim File :

import numpy as np

class DocSim(object):
    def __init__(self, w2v_model , stopwords=[]):
        self.w2v_model = w2v_model
        self.stopwords = stopwords





    def vectorize(self, doc):
        """Identify the vector values for each word in the given document"""
        doc = doc.lower()
        words = [w for w in doc.split(" ") if w not in self.stopwords]
        word_vecs = []
        for word in words:
            try:
                vec = self.w2v_model[word]
                word_vecs.append(vec)
            except KeyError:
                # Ignore, if the word doesn't exist in the vocabulary
                pass

        # Assuming that document vector is the mean of all the word vectors
        # PS: There are other & better ways to do it.
        vector = np.mean(word_vecs, axis=0)

        return vector


    def _cosine_sim(self, vecA, vecB):
        """Find the cosine similarity distance between two vectors."""
        csim = np.dot(vecA, vecB) / (np.linalg.norm(vecA) * np.linalg.norm(vecB))
        if np.isnan(np.sum(csim)):
            return 0
        return csim

    def calculate_similarity(self, source_doc, target_docs=[], threshold=0):
        """Calculates & returns similarity scores between given source document & all
        the target documents."""
        if isinstance(target_docs, str):
            target_docs = [target_docs]

        source_vec = self.vectorize(source_doc)
        results = []
        for doc in target_docs:
            target_vec = self.vectorize(doc)
            sim_score = self._cosine_sim(source_vec, target_vec)
            if sim_score > threshold:
                results.append({
                    'score' : sim_score,
                    'sentence' : doc
                })
            # Sort results by score in desc order
            results.sort(key=lambda k : k['score'] , reverse=True)

        return results

    def calculate_similarity2(self, source_doc=[], target_docs=[], threshold=0):
    """Calculates & returns similarity scores between given source document & all the target documents."""
        if isinstance(source_doc, str):
            target_docs = [source_doc]
            if isinstance(target_docs, str):
                target_docs = [target_docs]
                #source_vec = self.vectorize(source_doc)
                results = []
                for doc in source_doc:
                    source_vec = self.vectorize(doc)
                    for doc1 in target_docs:
                        target_vec = self.vectorize(doc)
                        sim_score = self._cosine_sim(source_vec, target_vec)
                        if sim_score > threshold:
                            results.append({
                                'score' : sim_score,
                                'source sentence' : doc,
                                'target sentence' : doc1
                            })
                        # Sort results by score in desc order
                        results.sort(key=lambda k : k['score'] , reverse=True)
        return results

here in instruction code when i try to call the fucntion :

To create DocSim Object

ds = DocSim(word2vec_model,stopwords=stopwords)

sim_scores = ds.calculate_similarity2(source_doc, target_docs)

the error message is :

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-54-bb0bd1e0e0ad> in <module>()
----> 1 sim_scores = ds.calculate_similarity2(source_doc, target_docs)

AttributeError: 'DocSim' object has no attribute 'calculate_similarity2'

i don't undersantand how to resolve this problem.

I can access to all function except calculate_similarity2

Can you help me please?

thanks

1 Answers

1
RanAB On Best Solutions

You have defined the calculate_similarity2 function inside the __init__ scope. Try getting it out of there