SpaCy3 ValueError without message

137 views Asked by At

I'm new to Spacy but want to train simply NER with new labels using the en_core_web_trf. So I've created code like below however I keep getting unknown ValueError.

How can I fix this?

import random
from spacy.training import Example
from spacy.util import minibatch, compounding

def train_spacy_model(data, model='en_core_web_trf', n_iter=30):
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")

    print("ner" in nlp.pipe_names)
    if "ner" not in nlp.pipe_names:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)
    else:
        ner = nlp.get_pipe("ner")
        
    TRAIN_DATA = data
    
    examples = []
    for text, annotations in TRAIN_DATA:
        examples.append(Example.from_dict(nlp.make_doc(text), annotations))
    nlp.initialize(lambda: examples)
 
    
    pipe_exceptions = ["ner"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.create_optimizer()
        for itn in range(n_iter):
            print ("Starting iteration " + str(itn))
            random.shuffle(examples)
            losses = {}
            batches = minibatch(examples, size=2)
            for batch in batches:
                nlp.update(
                    batch,  
                    drop=0.20, 
                    sgd=optimizer,
                    losses=losses
                )

            print("Losses", losses)
    
    return nlp
TRAIN_DATA = [
     ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
     ("I like London.", {"entities": [(7, 13, "LOC")]}),
 ]

nlp = train_spacy_model(data=no_verlaps_dataset, n_iter=30)

the error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[22], line 51
     47             print("Losses", losses)
     49     return nlp
---> 51 nlp = train_spacy_model(data=no_verlaps_dataset, n_iter=30)

Cell In[22], line 40, in train_spacy_model(data, model, n_iter)
     36         batches = minibatch(examples, size=2)#compounding(4.0, 64.0, 1.2))
     37         for batch in batches:
     38             # print(batch)
     39             # texts, annotations = zip(*batch)
---> 40             nlp.update(
     41                 batch,  
     42                 drop=0.20, 
     43                 sgd=optimizer,
     44                 losses=losses
     45             )
     47         print("Losses", losses)
     49 return nlp

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/language.py:1164, in Language.update(self, examples, _, drop, sgd, losses, component_cfg, exclude, annotates)
   1161 for name, proc in self.pipeline:
   1162     # ignore statements are used here because mypy ignores hasattr
   1163     if name not in exclude and hasattr(proc, "update"):
-> 1164         proc.update(examples, sgd=None, losses=losses, **component_cfg[name])  # type: ignore
   1165     if sgd not in (None, False):
   1166         if (
   1167             name not in exclude
   1168             and isinstance(proc, ty.TrainableComponent)
   1169             and proc.is_trainable
   1170             and proc.model not in (True, False, None)
   1171         ):

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/pipeline/transition_parser.pyx:398, in spacy.pipeline.transition_parser.Parser.update()

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:309, in Model.begin_update(self, X)
    302 def begin_update(self, X: InT) -> Tuple[OutT, Callable[[InT], OutT]]:
    303     """Run the model over a batch of data, returning the output and a
    304     callback to complete the backward pass. A tuple (Y, finish_update),
    305     where Y is a batch of output data, and finish_update is a callback that
    306     takes the gradient with respect to the output and an optimizer function,
    307     and returns the gradient with respect to the input.
    308     """
--> 309     return self._func(self, X, is_train=True)

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/ml/tb_framework.py:33, in forward(model, X, is_train)
     32 def forward(model, X, is_train):
---> 33     step_model = ParserStepModel(
     34         X,
     35         model.layers,
     36         unseen_classes=model.attrs["unseen_classes"],
     37         train=is_train,
     38         has_upper=model.attrs["has_upper"],
     39     )
     41     return step_model, step_model.finish_steps

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/ml/parser_model.pyx:217, in spacy.ml.parser_model.ParserStepModel.__init__()

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:291, in Model.__call__(self, X, is_train)
    288 def __call__(self, X: InT, is_train: bool) -> Tuple[OutT, Callable]:
    289     """Call the model's `forward` function, returning the output and a
    290     callback to compute the gradients via backpropagation."""
--> 291     return self._func(self, X, is_train=is_train)

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/layers/chain.py:54, in forward(model, X, is_train)
     52 callbacks = []
     53 for layer in model.layers:
---> 54     Y, inc_layer_grad = layer(X, is_train=is_train)
     55     callbacks.append(inc_layer_grad)
     56     X = Y

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:291, in Model.__call__(self, X, is_train)
    288 def __call__(self, X: InT, is_train: bool) -> Tuple[OutT, Callable]:
    289     """Call the model's `forward` function, returning the output and a
    290     callback to compute the gradients via backpropagation."""
--> 291     return self._func(self, X, is_train=is_train)

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/layers/chain.py:54, in forward(model, X, is_train)
     52 callbacks = []
     53 for layer in model.layers:
---> 54     Y, inc_layer_grad = layer(X, is_train=is_train)
     55     callbacks.append(inc_layer_grad)
     56     X = Y

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/model.py:291, in Model.__call__(self, X, is_train)
    288 def __call__(self, X: InT, is_train: bool) -> Tuple[OutT, Callable]:
    289     """Call the model's `forward` function, returning the output and a
    290     callback to compute the gradients via backpropagation."""
--> 291     return self._func(self, X, is_train=is_train)

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy_transformers/layers/listener.py:58, in forward(model, docs, is_train)
     56 def forward(model: TransformerListener, docs, is_train):
     57     if is_train:
---> 58         model.verify_inputs(docs)
     59         return model._outputs, model.backprop_and_clear
     60     else:

File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy_transformers/layers/listener.py:47, in TransformerListener.verify_inputs(self, inputs)
     45 def verify_inputs(self, inputs):
     46     if self._batch_id is None and self._outputs is None:
---> 47         raise ValueError
     48     else:
     49         batch_id = self.get_batch_id(inputs)

ValueError: 
0

There are 0 answers