I have the following code:
# imports, e.g. pandas, dill, spacy, nltk
class CxG:
# ...
def generate_features(self, document: Doc, token: Token):
# Function needs to be self-contained, so that it can be parallelized -> import everything locally here:
from nltk.corpus.reader.wordnet import WordNetCorpusReader as wncr
from src.modules.wsd import lesk # TODO Import from nltk, as soon as lang is in the current version
from src.slot import LexSlot, MorphSlot, SynSlot, SemSlot
pos = token.pos_ if token.pos_ else None
lemma = token.lemma_
morph = str(token.morph) if token.morph else None
synset = None
if pos in dir(wncr): # if the pos tag is available in wordnet
wn_pos = getattr(wncr, pos) # get the wordnet pos tag
# Get the most probable synset using Lesk's algorithm:
synset = lesk(document.text, lemma, pos=wn_pos, lang=self.language)
if synset:
return LexSlot(pos, morph, synset, token.text), SemSlot(pos, morph, synset), 1 # pos, morph, synset, 1
if morph:
return LexSlot(pos, morph, synset, token.text), MorphSlot(pos, morph), 1 # pos, morph, None, 1
else:
return LexSlot(pos, morph, synset, token.text), SynSlot(pos), 1
return LexSlot(pos, morph, synset, token.text), None, 1
def collect_features_for_document(self, document, get_all=False):
token_features = []
for token in document:
if get_all: # get all features
token_features.append(self.generate_features(document, token))
else: # only get the LexSlot
token_features.append(self.generate_features(document, token)[0])
return tuple(token_features)
def collect_document_features(self, save=False, parallel=False):
documents = self.data.documents["document"]
if parallel:
document_features = [
self.collect_features_for_document(document) for document
in tqdm(self.nlp.pipe(documents, n_process=os.cpu_count()),# batch_size=300),
desc="Collecting features per document",
total=len(documents))
]
else:
document_features = []
for document in tqdm(documents, desc="Collecting features per document"):
token_features = []
for token in document:
# Retrieve the features for each token in the current document:
token_features.append(self.generate_features(document, token)[0])
document_features.append(tuple(token_features)) # store the slots for each token in a tuple
self.data.documents["features"] = pd.DataFrame({"features": document_features})
if save:
self.save_cxg()
# ...
def save_cxg(self):
print("Saving CxG to file...", file=sys.stderr)
# Create directories if they don't exist:
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)
with open(f"{self.save_path}/cxg.dill", "wb") as f:
dill.dump(self, f)
print("Done saving!", file=sys.stderr)
This leads to the following error as soon as the code reaches self.save_cxg():
...
Collecting features per document: 100%|██████████| 118579/118579 [14:20<00:00, 137.74it/s]
Saving CxG to file...
Traceback (most recent call last):
File "[…]/c3xg/src/cxg.py", line 442, in <module>
cxg_english.collect_document_features(save=True, parallel=True)
File "[…]/c3xg/src/cxg.py", line 160, in collect_document_features
self.save_cxg()
File "[…]/c3xg/src/cxg.py", line 385, in save_cxg
dill.dump(self, f)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 250, in dump
Pickler(file, protocol, **_kwds).dump(obj)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 418, in dump
StockPickler.dump(self, obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 487, in dump
self.save(obj)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
save(state)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
self._batch_setitems(obj.items())
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
save(v)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
save(state)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
self._batch_setitems(obj.items())
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
save(v)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
save(state)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
self._batch_setitems(obj.items())
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
save(v)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
save(args)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
save(args)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 404, in save_numpy_array
pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
save(args)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 932, in save_list
self._batch_appends(obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 956, in _batch_appends
save(x)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
save(args)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
save(state)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
save(element)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
self._batch_setitems(obj.items())
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
save(v)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
save(state)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
f(self, obj) # Call unbound method with explicit self
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
self._batch_setitems(obj.items())
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
save(v)
File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
StockPickler.save(self, obj, save_persistent_id)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
self.save_reduce(obj=obj, *rv)
File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 684, in save_reduce
raise PicklingError(
_pickle.PicklingError: args[0] from __newobj__ args has the wrong class
I saw similar issues here and here, but I couldn't see any similarities to my code. I have the suspicion that it might have something to do with Spacy's nlp object, because I found issues like this which relate somewhat to the usage of Spacy and multiprocessing.
What could be the reason that saving the CxG object showed in the code above is not possible?