spacy-transformers not loading tokenizer #10640
-
How to reproduce the behaviourThe tokenizer does not load properly in the Python example: from spacy.lang.en import English
from spacy_transformers import Transformer, TransformerModel
from spacy_transformers.annotation_setters import null_annotation_setter
from spacy_transformers.span_getters import get_doc_spans
nlp = English()
model = TransformerModel(
name="roberta-base",
get_spans=get_doc_spans,
tokenizer_config={"use_fast": True},
transformer_config={}
)
trf = Transformer(
nlp.vocab,
model,
set_extra_annotations=null_annotation_setter,
max_batch_items=4096,
)
nlp.add_pipe("transformer")
# Cannot properly tokenize text
doc = nlp("Hello") This yields the following error: ---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-1-ebf1789e7412> in <module>
20
21 # Cannot properly tokenize text
---> 22 doc = nlp("Hello")
/usr/local/lib/python3.7/site-packages/spacy/language.py in __call__(self, text, disable, component_cfg)
1020 raise ValueError(Errors.E109.format(name=name)) from e
1021 except Exception as e:
-> 1022 error_handler(name, proc, [doc], e)
1023 if doc is None:
1024 raise ValueError(Errors.E005.format(name=name))
/usr/local/lib/python3.7/site-packages/spacy/util.py in raise_error(proc_name, proc, docs, e)
1615
1616 def raise_error(proc_name, proc, docs, e):
-> 1617 raise e
1618
1619
/usr/local/lib/python3.7/site-packages/spacy/language.py in __call__(self, text, disable, component_cfg)
1015 error_handler = proc.get_error_handler()
1016 try:
-> 1017 doc = proc(doc, **component_cfg.get(name, {})) # type: ignore[call-arg]
1018 except KeyError as e:
1019 # This typically happens if a component is not initialized
/usr/local/lib/python3.7/site-packages/spacy_transformers/pipeline_component.py in __call__(self, doc)
190 """
191 install_extensions()
--> 192 outputs = self.predict([doc])
193 self.set_annotations([doc], outputs)
194 return doc
/usr/local/lib/python3.7/site-packages/spacy_transformers/pipeline_component.py in predict(self, docs)
226 activations = FullTransformerBatch.empty(len(docs))
227 else:
--> 228 activations = self.model.predict(docs)
229 batch_id = TransformerListener.get_batch_id(docs)
230 for listener in self.listeners:
/usr/local/lib/python3.7/site-packages/thinc/model.py in predict(self, X)
313 only the output, instead of the `(output, callback)` tuple.
314 """
--> 315 return self._func(self, X, is_train=False)[0]
316
317 def finish_update(self, optimizer: Optimizer) -> None:
/usr/local/lib/python3.7/site-packages/spacy_transformers/layers/transformer_model.py in forward(model, docs, is_train)
175 if "logger" in model.attrs:
176 log_gpu_memory(model.attrs["logger"], "begin forward")
--> 177 batch_encoding = huggingface_tokenize(tokenizer, [span.text for span in flat_spans])
178 wordpieces = WordpieceBatch.from_batch_encoding(batch_encoding)
179 if "logger" in model.attrs:
/usr/local/lib/python3.7/site-packages/spacy_transformers/layers/transformer_model.py in huggingface_tokenize(tokenizer, texts)
276 return_tensors="np",
277 return_token_type_ids=None, # Sets to model default
--> 278 padding="longest",
279 )
280 token_data["input_texts"] = []
TypeError: 'NoneType' object is not callable The same error occurs when loading a config with equivalent settings. I believe the core issue is that the hf_model = huggingface_from_pretrained(name, tokenizer_config, transformer_config) The Your Environment
|
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 5 replies
-
This is the right repo. We intentionally disable issues on spacy-transformers to keep things in one place. Sorry this is confusing. I would need more time to figure out exactly what's going wrong but I think the issue is just that you're not initializing the In particular note here:
This just adds a Not sure if those docs alone will clear things up for you, can you clarify what you're actually trying to do? |
Beta Was this translation helpful? Give feedback.
This is the right repo. We intentionally disable issues on spacy-transformers to keep things in one place.
Sorry this is confusing. I would need more time to figure out exactly what's going wrong but I think the issue is just that you're not initializing the
transformer
component (the way this works is a little special).In particular note here:
This just adds a
transformer
with default config, everything you did with thetrf
object above is ignored. See here for an example of actually passing the config.Not sure if those docs alone will clear things up for you, can you clarify what you're actually trying to do?