I'm new to Spacy but want to train simply NER with new labels using the en_core_web_trf
. So I've created code like below however I keep getting unknown ValueError
.
How can I fix this?
import random
from spacy.training import Example
from spacy.util import minibatch, compounding
def train_spacy_model(data, model='en_core_web_trf', n_iter=30):
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
else:
nlp = spacy.blank("en") # create blank Language class
print("Created blank 'en' model")
print("ner" in nlp.pipe_names)
if "ner" not in nlp.pipe_names:
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner, last=True)
else:
ner = nlp.get_pipe("ner")
TRAIN_DATA = data
examples = []
for text, annotations in TRAIN_DATA:
examples.append(Example.from_dict(nlp.make_doc(text), annotations))
nlp.initialize(lambda: examples)
pipe_exceptions = ["ner"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train NER
optimizer = nlp.create_optimizer()
for itn in range(n_iter):
print ("Starting iteration " + str(itn))
random.shuffle(examples)
losses = {}
batches = minibatch(examples, size=2)
for batch in batches:
nlp.update(
batch,
drop=0.20,
sgd=optimizer,
losses=losses
)
print("Losses", losses)
return nlp
TRAIN_DATA = [
("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
("I like London.", {"entities": [(7, 13, "LOC")]}),
]
nlp = train_spacy_model(data=no_verlaps_dataset, n_iter=30)
the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[22], line 51
47 print("Losses", losses)
49 return nlp
---> 51 nlp = train_spacy_model(data=no_verlaps_dataset, n_iter=30)
Cell In[22], line 40, in train_spacy_model(data, model, n_iter)
36 batches = minibatch(examples, size=2)#compounding(4.0, 64.0, 1.2))
37 for batch in batches:
38 # print(batch)
39 # texts, annotations = zip(*batch)
---> 40 nlp.update(
41 batch,
42 drop=0.20,
43 sgd=optimizer,
44 losses=losses
45 )
47 print("Losses", losses)
49 return nlp
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/language.py:1164, in Language.update(self, examples, _, drop, sgd, losses, component_cfg, exclude, annotates)
1161 for name, proc in self.pipeline:
1162 # ignore statements are used here because mypy ignores hasattr
1163 if name not in exclude and hasattr(proc, "update"):
-> 1164 proc.update(examples, sgd=None, losses=losses, **component_cfg[name]) # type: ignore
1165 if sgd not in (None, False):
1166 if (
1167 name not in exclude
1168 and isinstance(proc, ty.TrainableComponent)
1169 and proc.is_trainable
1170 and proc.model not in (True, False, None)
1171 ):
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/pipeline/transition_parser.pyx:398, in spacy.pipeline.transition_parser.Parser.update()
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:309, in Model.begin_update(self, X)
302 def begin_update(self, X: InT) -> Tuple[OutT, Callable[[InT], OutT]]:
303 """Run the model over a batch of data, returning the output and a
304 callback to complete the backward pass. A tuple (Y, finish_update),
305 where Y is a batch of output data, and finish_update is a callback that
306 takes the gradient with respect to the output and an optimizer function,
307 and returns the gradient with respect to the input.
308 """
--> 309 return self._func(self, X, is_train=True)
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/ml/tb_framework.py:33, in forward(model, X, is_train)
32 def forward(model, X, is_train):
---> 33 step_model = ParserStepModel(
34 X,
35 model.layers,
36 unseen_classes=model.attrs["unseen_classes"],
37 train=is_train,
38 has_upper=model.attrs["has_upper"],
39 )
41 return step_model, step_model.finish_steps
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/ml/parser_model.pyx:217, in spacy.ml.parser_model.ParserStepModel.__init__()
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:291, in Model.__call__(self, X, is_train)
288 def __call__(self, X: InT, is_train: bool) -> Tuple[OutT, Callable]:
289 """Call the model's `forward` function, returning the output and a
290 callback to compute the gradients via backpropagation."""
--> 291 return self._func(self, X, is_train=is_train)
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/layers/chain.py:54, in forward(model, X, is_train)
52 callbacks = []
53 for layer in model.layers:
---> 54 Y, inc_layer_grad = layer(X, is_train=is_train)
55 callbacks.append(inc_layer_grad)
56 X = Y
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:291, in Model.__call__(self, X, is_train)
288 def __call__(self, X: InT, is_train: bool) -> Tuple[OutT, Callable]:
289 """Call the model's `forward` function, returning the output and a
290 callback to compute the gradients via backpropagation."""
--> 291 return self._func(self, X, is_train=is_train)
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/layers/chain.py:54, in forward(model, X, is_train)
52 callbacks = []
53 for layer in model.layers:
---> 54 Y, inc_layer_grad = layer(X, is_train=is_train)
55 callbacks.append(inc_layer_grad)
56 X = Y
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:291, in Model.__call__(self, X, is_train)
288 def __call__(self, X: InT, is_train: bool) -> Tuple[OutT, Callable]:
289 """Call the model's `forward` function, returning the output and a
290 callback to compute the gradients via backpropagation."""
--> 291 return self._func(self, X, is_train=is_train)
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy_transformers/layers/listener.py:58, in forward(model, docs, is_train)
56 def forward(model: TransformerListener, docs, is_train):
57 if is_train:
---> 58 model.verify_inputs(docs)
59 return model._outputs, model.backprop_and_clear
60 else:
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy_transformers/layers/listener.py:47, in TransformerListener.verify_inputs(self, inputs)
45 def verify_inputs(self, inputs):
46 if self._batch_id is None and self._outputs is None:
---> 47 raise ValueError
48 else:
49 batch_id = self.get_batch_id(inputs)
ValueError: