A want to resolve coreferences without Internet using AllenNLP and coref-spanbert-large model. I try to do it in the way that is describing here https://demo.allennlp.org/coreference-resolution
My code:
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging
predictor = Predictor.from_path(r"C:\Users\aap\Desktop\coref-spanbert-large-2021.03.10.tar.gz")
example = 'Paul Allen was born on January 21, 1953, in Seattle, Washington, to Kenneth Sam Allen and Edna Faye Allen.Allen attended Lakeside School, a private school in Seattle, where he befriended Bill Gates, two years younger, with whom he shared an enthusiasm for computers.'
pred = predictor.predict(document=example)
coref_res = predictor.coref_resolved(example)
print(pred)
print(coref_res)
When I have an access to internet the code works correctly. But when I don't have an access to internet I get the following errors:
Traceback (most recent call last):
File "C:/Users/aap/Desktop/CoreNLP/Coref_AllenNLP.py", line 14, in <module>
predictor = Predictor.from_path(r"C:\Users\aap\Desktop\coref-spanbert-large-2021.03.10.tar.gz")
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\predictors\predictor.py", line 361, in from_path
load_archive(archive_path, cuda_device=cuda_device, overrides=overrides),
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\models\archival.py", line 206, in load_archive
config.duplicate(), serialization_dir
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\models\archival.py", line 232, in _load_dataset_readers
dataset_reader_params, serialization_dir=serialization_dir
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 604, in from_params
**extras,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 632, in from_params
kwargs = create_kwargs(constructor_to_inspect, cls, params, **extras)
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 200, in create_kwargs
cls.__name__, param_name, annotation, param.default, params, **extras
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 307, in pop_and_construct_arg
return construct_arg(class_name, name, popped_params, annotation, default, **extras)
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 391, in construct_arg
**extras,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 341, in construct_arg
return annotation.from_params(params=popped_params, **subextras)
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 604, in from_params
**extras,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\from_params.py", line 634, in from_params
return constructor_to_call(**kwargs) # type: ignore
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\data\token_indexers\pretrained_transformer_mismatched_indexer.py", line 63, in __init__
**kwargs,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\data\token_indexers\pretrained_transformer_indexer.py", line 58, in __init__
model_name, tokenizer_kwargs=tokenizer_kwargs
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\data\tokenizers\pretrained_transformer_tokenizer.py", line 71, in __init__
model_name, add_special_tokens=False, **tokenizer_kwargs
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\allennlp\common\cached_transformers.py", line 110, in get_tokenizer
**kwargs,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\transformers\models\auto\tokenization_auto.py", line 362, in from_pretrained
config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\transformers\models\auto\configuration_auto.py", line 368, in from_pretrained
config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\transformers\configuration_utils.py", line 424, in get_config_dict
use_auth_token=use_auth_token,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\transformers\file_utils.py", line 1087, in cached_path
local_files_only=local_files_only,
File "C:\Users\aap\Desktop\CoreNLP\corenlp\lib\site-packages\transformers\file_utils.py", line 1268, in get_from_cache
"Connection error, and we cannot find the requested files in the cached path."
ValueError: Connection error, and we cannot find the requested files in the cached path. Please try again or make sure your Internet connection is on.
Process finished with exit code 1
Please, say me, what do I need to do my code works without Internet?
You will need a local copy of transformer model's configuration file and vocabulary so that the tokenizer and token indexer don't need to download those:
You will then need to override the transformer model name in the configuration file to the local directory (
local_config_path
) where you saved these things: