error in add_pipe() #13714

bisserai · 2024-12-12T13:44:05Z

bisserai
Dec 12, 2024

hello community,

i'm very unexperienced, so apologies if there is an obvious solution i've missed. i'm trying to use benepar to get the constituency structure of some german texts. when i go
nlp = spacy.load('de_core_news_lg') nlp.add_pipe("benepar", config={"model": "benepar_de2"})
i unfortunately get the error:
RuntimeError: Error(s) in loading state_dict for ChartParser:
Unexpected key(s) in state_dict: "pretrained_model.embeddings.position_ids".

i think there might also be a problem with my environment as in the import statements i get the following error:
/Users/ivanova/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/torch/nn/modules/transformer.py:20: UserWarning: Failed to initialize NumPy: _ARRAY_API not found (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_numpy.cpp:84.)
device: torch.device = torch.device(torch._C._get_default_device()), # torch.device('cpu'),
i tried installing different versions of numpy, but it didnt' help.

thanks for your help,
bissera

full traceback of main error:

RuntimeError Traceback (most recent call last)
Cell In[4], line 2
1 nlp = spacy.load('de_core_news_lg')
----> 2 nlp.add_pipe("benepar", config={"model": "benepar_de2"})
3 # if spacy.version.startswith('2'):
4 # nlp.add_pipe(benepar.BeneparComponent("benepar_de2"))
5 # else:
6 # nlp.add_pipe("benepar", config={"model": "benepar_de2"})

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/spacy/language.py:824, in Language.add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
820 pipe_component, factory_name = self.create_pipe_from_source(
821 factory_name, source, name=name
822 )
823 else:
--> 824 pipe_component = self.create_pipe(
825 factory_name,
826 name=name,
827 config=config,
828 raw_config=raw_config,
829 validate=validate,
830 )
831 pipe_index = self._get_pipe_index(before, after, first, last)
832 self._pipe_meta[name] = self.get_factory_meta(factory_name)

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/spacy/language.py:712, in Language.create_pipe(self, factory_name, name, config, raw_config, validate)
709 cfg = {factory_name: config}
710 # We're calling the internal _fill here to avoid constructing the
711 # registered functions twice
--> 712 resolved = registry.resolve(cfg, validate=validate)
713 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
714 filled = Config(filled)

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/confection/init.py:760, in registry.resolve(cls, config, schema, overrides, validate)
751 @classmethod
752 def resolve(
753 cls,
(...)
758 validate: bool = True,
759 ) -> Dict[str, Any]:
--> 760 resolved, _ = cls._make(
761 config, schema=schema, overrides=overrides, validate=validate, resolve=True
762 )
763 return resolved

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/confection/init.py:809, in registry._make(cls, config, schema, overrides, resolve, validate)
807 if not is_interpolated:
808 config = Config(orig_config).interpolate()
--> 809 filled, _, resolved = cls._fill(
810 config, schema, validate=validate, overrides=overrides, resolve=resolve
811 )
812 filled = Config(filled, section_order=section_order)
813 # Check that overrides didn't include invalid properties not in config

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/confection/init.py:881, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
878 getter = cls.get(reg_name, func_name)
879 # We don't want to try/except this and raise our own error
880 # here, because we want the traceback if the function fails.
--> 881 getter_result = getter(*args, **kwargs)
882 else:
883 # We're not resolving and calling the function, so replace
884 # the getter_result with a Promise class
885 getter_result = Promise(
886 registry=reg_name, name=func_name, args=args, kwargs=kwargs
887 )

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/benepar/integrations/spacy_plugin.py:176, in create_benepar_component(nlp, name, model, subbatch_max_tokens, disable_tagger)
169 def create_benepar_component(
170 nlp,
171 name,
(...)
174 disable_tagger: bool,
175 ):
--> 176 return BeneparComponent(
177 model,
178 subbatch_max_tokens=subbatch_max_tokens,
179 disable_tagger=disable_tagger,
180 )

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/benepar/integrations/spacy_plugin.py:116, in BeneparComponent.init(self, name, subbatch_max_tokens, disable_tagger, batch_size)
96 def init(
97 self,
98 name,
(...)
101 batch_size="ignored",
102 ):
103 """Load a trained parser model.
104
105 Args:
(...)
114 batch_size: deprecated and ignored; use subbatch_max_tokens instead
115 """
--> 116 self._parser = load_trained_model(name)
117 if torch.cuda.is_available():
118 self._parser.cuda()

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/benepar/integrations/downloader.py:34, in load_trained_model(model_name_or_path)
32 model_path = locate_model(model_name_or_path)
33 from ..parse_chart import ChartParser
---> 34 parser = ChartParser.from_trained(model_path)
35 return parser

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/benepar/parse_chart.py:186, in ChartParser.from_trained(cls, model_path)
184 config["hparams"] = nkutil.HParams(**hparams)
185 parser = cls(**config)
--> 186 parser.load_state_dict(state_dict)
187 return parser

File ~/opt/anaconda3/envs/parseg/lib/python3.12/site-packages/torch/nn/modules/module.py:2153, in Module.load_state_dict(self, state_dict, strict, assign)
2148 error_msgs.insert(
2149 0, 'Missing key(s) in state_dict: {}. '.format(
2150 ', '.join(f'"{k}"' for k in missing_keys)))
2152 if len(error_msgs) > 0:
-> 2153 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
2154 self.class.name, "\n\t".join(error_msgs)))
2155 return _IncompatibleKeys(missing_keys, unexpected_keys)

RuntimeError: Error(s) in loading state_dict for ChartParser:
Unexpected key(s) in state_dict: "pretrained_model.embeddings.position_ids".

bisserai · 2024-12-12T13:51:49Z

bisserai
Dec 12, 2024
Author

update: i get exactly the same error when i try to run with nltk instead

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

error in add_pipe() #13714

Uh oh!

{{title}}

Uh oh!

Replies: 1 comment

Uh oh!

{{title}}

Uh oh!

Select a reply

Uh oh!

Uh oh!

error in add_pipe() #13714

Uh oh!

bisserai Dec 12, 2024

full traceback of main error:

Replies: 1 comment

Uh oh!

bisserai Dec 12, 2024 Author

bisserai
Dec 12, 2024

bisserai
Dec 12, 2024
Author