from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
#from itertools import product
#variables that will be used
database_word_synset=[]
uploaded_sentence_synset=[]
uploaded_sentence_words_tokenized=[]
filtered_uploaded_sentences = []
database_sentence_words_tokenized=[]
filtered_database_sentence=[]
database_sentence_synset=[]
word_check=[0.0]
sentence_check=[0.0]
count_sentence=0
count_word=0
not_fond=0
#the given data
uploaded_sentence=" The issue of text semantics, such as word semantics and sentence semantics has received increasing attentions in recent years. However, rare research focuses on the document-level semantic matching due to its complexity. Long documents usually have sophisticated structure and massive information, which causes hardship to measure their semantic similarity. The semantic similarity between words, sentences, texts, and documents is widely studied in various fields, including natural language processing, document semantic comparison, artificial intelligence, semantic web, and semantic search engines. "
database_word=["car","complete",'run',"sleep"]
database_sentence="the earth is round not flat"
stopwords = stopwords.words('english')
uploaded_sentence_words_tokenized = word_tokenize(uploaded_sentence)
#filtering the sentence and synset
for word in uploaded_sentence_words_tokenized:
if word not in stopwords:
filtered_uploaded_sentences.append(word)
print (filtered_uploaded_sentences)
for sentences_are in filtered_uploaded_sentences:
uploaded_sentence_synset.append(wn.synsets(sentences_are))
print(uploaded_sentence_synset)
#for finding similrity in the words
for databasewords in database_word:
database_word_synset.append(wn.synsets(databasewords))
print(database_word_synset)
words_list_synset=list()
for t in database_word_synset:
for x in t:
words_list_synset.append(x)
print(words_list_synset)
#removing empty list element and making single dimension list
removing_empty_list_uploaded_sentence=list()
removing_empty_list_uploaded_sentence = [x for x in uploaded_sentence_synset if x != []]
up_list_sentence=list()
for t in removing_empty_list_uploaded_sentence:
for x in t:
up_list_sentence.append(x)
print(up_list_sentence)
#the similarity main function for words
#sims=[]
#for sense1, sense2 in product(database_word_synset, up_list_sentence):
# d = wn.wup_similarity(sense1, sense2)
# sims.append(d)
#print (sims)
#word_found=list()
for data in words_list_synset:
for sen in up_list_sentence :
if wn.wup_similarity(data,sen) is None or wn.wup_similarity(data,sen) <0.70:
not_fond=not_fond+1
else:
count_word=count_word+1
print (word_check)
print("\n words that are not found :",not_fond)
print("\n words that are found :", count_word)
#for finding similrity in the sentence
database_sentence_words_tokenized=word_tokenize(database_sentence)
for word in database_sentence_words_tokenized:
if word not in stopwords:
filtered_database_sentence.append(word)
print(filtered_database_sentence)
for sentence_synset in filtered_database_sentence:
database_sentence_synset.append(wn.synsets(sentence_synset))
print(database_sentence_synset)
#removing empty list element and making single dimension list
removing_empty_list_db=list()
removing_empty_list_db = [x for x in database_sentence_synset if x != []]
db_list_sentence=list()
for t in removing_empty_list_db:
for x in t:
db_list_sentence.append(x)
print(db_list_sentence)
#the similarity main function for sentence
for db_sentence in db_list_sentence:
for upl_sentence in up_list_sentence:
sentence_check.append(wn.wup_similarity(db_sentence,upl_sentence))
for sentence_checks in sentence_check:
if sentence_checks is None or sentence_checks <0.70:
not_fond=not_fond+1
else:
count_sentence=count_sentence+1
print (sentence_check)
print("\n words that are not found :",not_fond)
print("\n words that are found :",count_sentence)
Installing of libraries in build file android studio:
In this project we use chaquopy to use python in our android project but it have some issues like in importing of libraries i have install Nltk, wordnet, stopping words and word tokenization seperately also but i am not able to access these libaries in python file and if we install our app it crashes.
if (! Python.isStarted()) {
Python.start(new AndroidPlatform(this));
Python py = Python.getInstance();
final PyObject pyobj = py.getModule("sum");
b2.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View view) {
if (path==null) {
Toast.makeText(documentupload.this, " plz upload the doc", Toast.LENGTH_SHORT).show();
//upload.setText(path);
// Intent intent= new Intent(documentupload.this,result.class);
//startActivity(intent);
}
else {
PyObject obj = pyobj.callAttr("main", Words.toString());
upload.setText(obj.toString());
Toast.makeText(documentupload.this, "uploaded" + Words, Toast.LENGTH_LONG).show();
// Toast.makeText(documentupload.this, " plz upload the doc", Toast.LENGTH_LONG).show();
}
}
});
When app crash it give this error message:
I assume the crash happened when calling
wn.synsets
? Here's the stack trace I saw:I don't think the "wordnet" and "corpus" pip packages have anything to do with nltk. Instead, you should install them using
nltk.download
, just as the error message says.Because of an emulator bug, you may need to call
nltk.download
in a loop, as described in this answer.