Saving an object with Pandas DataFrames and Spacy objects using dill: "_pickle.PicklingError: args[0] from __newobj__ args has the wrong class"

77 views Asked by At

I have the following code:

# imports, e.g. pandas, dill, spacy, nltk

class CxG:

    # ...

    def generate_features(self, document: Doc, token: Token):
        # Function needs to be self-contained, so that it can be parallelized -> import everything locally here:
        from nltk.corpus.reader.wordnet import WordNetCorpusReader as wncr
        from src.modules.wsd import lesk  # TODO Import from nltk, as soon as lang is in the current version
        from src.slot import LexSlot, MorphSlot, SynSlot, SemSlot

        pos = token.pos_ if token.pos_ else None
        lemma = token.lemma_
        morph = str(token.morph) if token.morph else None
        synset = None
        if pos in dir(wncr):  # if the pos tag is available in wordnet
            wn_pos = getattr(wncr, pos)  # get the wordnet pos tag
            # Get the most probable synset using Lesk's algorithm:
            synset = lesk(document.text, lemma, pos=wn_pos, lang=self.language)
            if synset:
                return LexSlot(pos, morph, synset, token.text), SemSlot(pos, morph, synset), 1  # pos, morph, synset, 1
        if morph:
            return LexSlot(pos, morph, synset, token.text), MorphSlot(pos, morph), 1  # pos, morph, None, 1
        else:
            return LexSlot(pos, morph, synset, token.text), SynSlot(pos), 1
        return LexSlot(pos, morph, synset, token.text), None, 1

    def collect_features_for_document(self, document, get_all=False):
        token_features = []
        for token in document:
            if get_all:  # get all features
                token_features.append(self.generate_features(document, token))
            else:  # only get the LexSlot
                token_features.append(self.generate_features(document, token)[0])
        return tuple(token_features)

    def collect_document_features(self, save=False, parallel=False):
        documents = self.data.documents["document"]
        if parallel:
            document_features = [
                self.collect_features_for_document(document) for document
                in tqdm(self.nlp.pipe(documents, n_process=os.cpu_count()),# batch_size=300),
                        desc="Collecting features per document",
                        total=len(documents))
            ]
        else:
            document_features = []
            for document in tqdm(documents, desc="Collecting features per document"):
               token_features = []
               for token in document:
                   # Retrieve the features for each token in the current document:
                   token_features.append(self.generate_features(document, token)[0])
               document_features.append(tuple(token_features))  # store the slots for each token in a tuple

        self.data.documents["features"] = pd.DataFrame({"features": document_features})

        if save:
            self.save_cxg()

    # ...

    def save_cxg(self):
        print("Saving CxG to file...", file=sys.stderr)
        # Create directories if they don't exist:
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        with open(f"{self.save_path}/cxg.dill", "wb") as f:
            dill.dump(self, f)
        print("Done saving!", file=sys.stderr)

This leads to the following error as soon as the code reaches self.save_cxg():

...

Collecting features per document: 100%|██████████| 118579/118579 [14:20<00:00, 137.74it/s]
Saving CxG to file...
Traceback (most recent call last):
  File "[…]/c3xg/src/cxg.py", line 442, in <module>
    cxg_english.collect_document_features(save=True, parallel=True)
  File "[…]/c3xg/src/cxg.py", line 160, in collect_document_features
    self.save_cxg()
  File "[…]/c3xg/src/cxg.py", line 385, in save_cxg
    dill.dump(self, f)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 250, in dump
    Pickler(file, protocol, **_kwds).dump(obj)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 418, in dump
    StockPickler.dump(self, obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 487, in dump
    self.save(obj)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
    save(state)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
    StockPickler.save_dict(pickler, obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
    self._batch_setitems(obj.items())
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
    save(v)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
    save(state)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
    StockPickler.save_dict(pickler, obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
    self._batch_setitems(obj.items())
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
    save(v)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
    save(state)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
    StockPickler.save_dict(pickler, obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
    self._batch_setitems(obj.items())
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
    save(v)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
    save(args)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
    save(args)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 404, in save_numpy_array
    pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
    save(args)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 932, in save_list
    self._batch_appends(obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 956, in _batch_appends
    save(x)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 692, in save_reduce
    save(args)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 902, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
    save(state)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 887, in save_tuple
    save(element)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
    StockPickler.save_dict(pickler, obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
    self._batch_setitems(obj.items())
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
    save(v)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 717, in save_reduce
    save(state)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 560, in save
    f(self, obj)  # Call unbound method with explicit self
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 1212, in save_module_dict
    StockPickler.save_dict(pickler, obj)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 972, in save_dict
    self._batch_setitems(obj.items())
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 998, in _batch_setitems
    save(v)
  File "[…]/.local/lib/python3.10/site-packages/dill/_dill.py", line 412, in save
    StockPickler.save(self, obj, save_persistent_id)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 603, in save
    self.save_reduce(obj=obj, *rv)
  File "[…]/anaconda3/envs/c3xg/lib/python3.10/pickle.py", line 684, in save_reduce
    raise PicklingError(
_pickle.PicklingError: args[0] from __newobj__ args has the wrong class

I saw similar issues here and here, but I couldn't see any similarities to my code. I have the suspicion that it might have something to do with Spacy's nlp object, because I found issues like this which relate somewhat to the usage of Spacy and multiprocessing.

What could be the reason that saving the CxG object showed in the code above is not possible?

0

There are 0 answers