I have following code using Spacy. There are couple of training records to recognize simple set of text around destination.
def init_training_data ():
nlp = spacy.blank('en')
training_data = [
("Coliseum is visited by John Doe", [(0, 8, "DESTINATION"), (12, 19, "VISIT"), (23, 31, "TRAVELLER")]),
("John Doe has not been to Louvre", [(0, 8, "TRAVELLER"), (9, 21, "NOT_VISITED"), (25, 31, "DESTINATION")])]
for text, annotations in training_data:
doc = nlp(text)
ents = []
for start, end, label in annotations:
span = doc.char_span(start, end, label=label)
print (start, end, label)
print (span)
ents.append(span)
print (ents)
doc.ents = ents
for itn in range(25):
random.shuffle(training_data)
for raw_text, entity_offsets in training_data:
doc = nlp.make_doc(raw_text)
example = Example.from_dict(doc, {"entities": entity_offsets})
print (raw_text)
nlp.update([example])
text=nlp("Rome is visited by Fred Smith")
print (len(text.ents))
for ents in text.ents:
# Print the document text and entitites
print(ents.text, ents.label_)
The line print(len(text.ents)) returns 0 records. It should return 3, right? Can someone please point me in the right direction?