I want to understand how to update the using example of NER updating the model to learn recognize new entity (here ANIMAL
) from version 2.x of SpaCy v3.x:
https://github.com/explosion/spaCy/blob/v2.3.x/examples/training/train_new_entity_type.py
LABEL = "ANIMAL"
# training data
TRAIN_DATA = [
(
"Horses are too tall and they pretend to care about your feelings",
{"entities": [(0, 6, LABEL)]},
),
("Do they bite?", {"entities": []}),
(
"horses are too tall and they pretend to care about your feelings",
{"entities": [(0, 6, LABEL)]},
),
("horses pretend to care about your feelings", {"entities": [(0, 6, LABEL)]}),
(
"they pretend to care about your feelings, those horses",
{"entities": [(48, 54, LABEL)]},
),
("horses?", {"entities": [(0, 6, LABEL)]}),
]
@plac.annotations(
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
new_model_name=("New model name for model meta.", "option", "nm", str),
output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int),
)
def main(model=None, new_model_name="animal", output_dir=None, n_iter=30):
"""Set up the pipeline and entity recognizer, and train the new entity."""
random.seed(0)
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
else:
nlp = spacy.blank("en") # create blank Language class
print("Created blank 'en' model")
# Add entity recognizer to model if it's not in the pipeline
# nlp.create_pipe works for built-ins that are registered with spaCy
if "ner" not in nlp.pipe_names:
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner)
# otherwise, get it, so we can add labels to it
else:
ner = nlp.get_pipe("ner")
ner.add_label(LABEL) # add new entity label to entity recognizer
# Adding extraneous labels shouldn't mess anything up
ner.add_label("VEGETABLE")
if model is None:
optimizer = nlp.begin_training()
else:
optimizer = nlp.resume_training()
move_names = list(ner.move_names)
# get names of other pipes to disable them during training
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
# only train NER
with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
# show warnings for misaligned entity spans once
warnings.filterwarnings("once", category=UserWarning, module='spacy')
sizes = compounding(1.0, 4.0, 1.001)
# batch up the examples using spaCy's minibatch
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
batches = minibatch(TRAIN_DATA, size=sizes)
losses = {}
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses)
print("Losses", losses)
# test the trained model
test_text = "Do you like horses?"
doc = nlp(test_text)
print("Entities in '%s'" % test_text)
for ent in doc.ents:
print(ent.label_, ent.text)
# save model to output directory
if output_dir is not None:
output_dir = Path(output_dir)
if not output_dir.exists():
output_dir.mkdir()
nlp.meta["name"] = new_model_name # rename model
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
# test the saved model
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
# Check the classes have loaded back consistently
assert nlp2.get_pipe("ner").move_names == move_names
doc2 = nlp2(test_text)
for ent in doc2.ents:
print(ent.label_, ent.text)
How this code should look like in SpaCy v3 so that it supports the model=en_core_web_trf
?
There's a demo project for updating an NER component in the projects repo. In spaCy v3, instead of writing your own training loop, the recommended training process is to use a config file and the
spacy train
CLI command.For updates like this in v3 there is no difference in how training is configured between transformer and non-transformer pipelines, since transformers no longer require extra components like in the example you posted.
Note that usually updating the pretrained NER component is not recommended, since it's easy to run into catastrophic forgetting. In the standard training process outlined in the docs, pretrained transformers are used as a feature source, so while the NER layer is from scratch, you aren't actually starting with nothing.