How to create a SpaCy pipeline from Model Data Path? #10350
-
I am having trouble with creating a pipeline when loading from spacy import util
import spacy
nlp = spacy.load('en_core_web_lg')
model = nlp.meta['lang'] + '_' + nlp.meta['name']
version = model + '-' + nlp.meta['version']
path = util.get_package_path(model).as_posix() + '/' + version
# /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/en_core_web_lg/en_core_web_lg-3.2.0
lang = 'en'
cls = util.get_lang_class(lang)
nlp = cls()
pipeline = ['tagger', 'parser', 'ner']
for name in pipeline:
nlp.add_pipe(name)
nlp.from_disk(path) The output: ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/Users/khoilr/Code/NLP/nlp.ipynb Cell 2 in <module>
17 for name in pipeline:
18 nlp.add_pipe(name)
---> 20 nlp.from_disk(path)
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/language.py:2042, in Language.from_disk(self, path, exclude, overrides)
2039 if not (path / "vocab").exists() and "vocab" not in exclude: # type: ignore[operator]
2040 # Convert to list here in case exclude is (default) tuple
2041 exclude = list(exclude) + ["vocab"]
-> 2042 util.from_disk(path, deserializers, exclude) # type: ignore[arg-type]
2043 self._path = path # type: ignore[assignment]
2044 self._link_components()
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/util.py:1299, in from_disk(path, readers, exclude)
1296 for key, reader in readers.items():
1297 # Split to support file names like meta.json
1298 if key.split(".")[0] not in exclude:
-> 1299 reader(path / key)
1300 return path
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/language.py:2036, in Language.from_disk.<locals>.<lambda>(p, proc)
2034 if not hasattr(proc, "from_disk"):
2035 continue
-> 2036 deserializers[name] = lambda p, proc=proc: proc.from_disk( # type: ignore[misc]
2037 p, exclude=["vocab"]
2038 )
2039 if not (path / "vocab").exists() and "vocab" not in exclude: # type: ignore[operator]
2040 # Convert to list here in case exclude is (default) tuple
2041 exclude = list(exclude) + ["vocab"]
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/pipeline/trainable_pipe.pyx:343, in spacy.pipeline.trainable_pipe.TrainablePipe.from_disk()
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/util.py:1299, in from_disk(path, readers, exclude)
1296 for key, reader in readers.items():
1297 # Split to support file names like meta.json
1298 if key.split(".")[0] not in exclude:
-> 1299 reader(path / key)
1300 return path
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/pipeline/trainable_pipe.pyx:333, in spacy.pipeline.trainable_pipe.TrainablePipe.from_disk.load_model()
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/pipeline/trainable_pipe.pyx:334, in spacy.pipeline.trainable_pipe.TrainablePipe.from_disk.load_model()
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/thinc/model.py:593, in Model.from_bytes(self, bytes_data)
591 msg = srsly.msgpack_loads(bytes_data)
592 msg = convert_recursive(is_xp_array, self.ops.asarray, msg)
--> 593 return self.from_dict(msg)
File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/thinc/model.py:610, in Model.from_dict(self, msg)
608 nodes = list(self.walk())
609 if len(msg["nodes"]) != len(nodes):
--> 610 raise ValueError("Cannot deserialize model: mismatched structure")
611 for i, node in enumerate(nodes):
612 info = msg["nodes"][i]
ValueError: Cannot deserialize model: mismatched structure |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
I'm going to copy your comment from Stack Overflow that explains why you are trying to do this:
If that's the case I would recommend the dev docs for Language and the To be clear for anyone finding this later, it is not expected or recommended you write your own code to deserialize things like this. Normally you should just be able to use |
Beta Was this translation helpful? Give feedback.
I'm going to copy your comment from Stack Overflow that explains why you are trying to do this:
If that's the case I would recommend the dev docs for Language and the
load_model_from_path
function, which usesload_model_from_config
to prep the Language object andLanguage.from_disk
to actually load all the components.To be clear for anyone finding this later, it is not expected or recommended you write your own code to deserialize things like this. Normally you should just be able to use
spacy.load
.