|
18 | 18 | from typing import Callable, Iterator |
19 | 19 |
|
20 | 20 | from lighteval.utils.imports import ( |
21 | | - NO_SPACY_TOKENIZER_ERROR_MSG, |
22 | | - NO_STANZA_TOKENIZER_ERROR_MSG, |
23 | | - can_load_spacy_tokenizer, |
24 | | - can_load_stanza_tokenizer, |
| 21 | + Extra, |
| 22 | + requires, |
25 | 23 | ) |
26 | 24 | from lighteval.utils.language import Language |
27 | 25 |
|
@@ -99,11 +97,10 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: |
99 | 97 | return list(self.tokenizer.span_tokenize(text)) |
100 | 98 |
|
101 | 99 |
|
| 100 | +@requires(Extra.MULTILINGUAL) |
102 | 101 | class SpaCyTokenizer(WordTokenizer): |
103 | 102 | def __init__(self, spacy_language: str, config=None): |
104 | 103 | super().__init__() |
105 | | - if not can_load_spacy_tokenizer(spacy_language): |
106 | | - raise ImportError(NO_SPACY_TOKENIZER_ERROR_MSG) |
107 | 104 | self.spacy_language = spacy_language |
108 | 105 | self.config = config |
109 | 106 | self._tokenizer = None |
@@ -137,11 +134,10 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: |
137 | 134 | ] |
138 | 135 |
|
139 | 136 |
|
| 137 | +@requires("stanza") |
140 | 138 | class StanzaTokenizer(WordTokenizer): |
141 | 139 | def __init__(self, stanza_language: str, **stanza_kwargs): |
142 | 140 | super().__init__() |
143 | | - if not can_load_stanza_tokenizer(): |
144 | | - raise ImportError(NO_STANZA_TOKENIZER_ERROR_MSG) |
145 | 141 | self.stanza_language = stanza_language |
146 | 142 | self.stanza_kwargs = stanza_kwargs |
147 | 143 | self._tokenizer = None |
|
0 commit comments