I'm trying to add data to the NLTK dataset to make a machine learning project. However, I can't figure out how to add text to the NLTK dataset.
My code is as follows:
import nltk
from nltk.corpus import wordnet
from nltk.corpus.reader.wordnet import WordNetCorpusReader
data_path = 'training.csv'
wn_reader = WordNetCorpusReader(nltk.data.find('corpora/wordnet'), None)
wn_reader.insert(data_path)
def get_wordnet_pos(word):
"""Map POS tag from treebank form to wordnet form"""
tag_dict = {"J": wordnet.ADJ,
"N": wordnet.NOUN,
"V": wordnet.VERB,
"R": wordnet.ADV}
tag = nltk.pos_tag([word])[0][1][0].upper()
return tag_dict.get(tag, wordnet.NOUN)
def correct_sentence(text):
"""Correct sentence grammar"""
words = nltk.word_tokenize(text)
# Using List Comprehension Instead of a for loop
words_corrected = [wordnet.morphy(word, get_wordnet_pos(word)) or word for word, tag in nltk.pos_tag(words)]
return ' '.join(words_corrected)
text = "I have writed a message but forgot to send it."
corrected_text = correct_sentence(text)
print(corrected_text)
I also don't know whether to use a csv file or not.
After getting the error
Traceback (most recent call last):
File "main.py", line 10, in <module>
wn_reader = WordNetCorpusReader(nltk.data.find('wordnet'), None)
File "/home/runner/myfolder/venv/lib/python3.10/site-packages/nltk/data.py", line 560, in find
resource_zipname = resource_name.split("/")[1]
IndexError: list index out of range,
I tried to read the NLTK howto at the NLTK website, but sadly it was no help.