Skip to content

Commit f43d3dd

Browse files
authored
Update tokenizer unit tests (#919)
* Ignore kobert from unit tests (now requires `trust_remote_code=True` * Remove tokenizers without chat template
1 parent 50d5620 commit f43d3dd

File tree

1 file changed

+3
-14
lines changed

1 file changed

+3
-14
lines changed

tests/generate_tests.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@
7272

7373
# TODO: remove when https://github.com/huggingface/transformers/issues/28096 is addressed
7474
'RajuKandasamy/tamillama_tiny_30m',
75+
76+
# Requires `trust_remote_code`
77+
'monologg/kobert',
7578
]
7679

7780
MAX_TESTS = {
@@ -228,10 +231,6 @@
228231

229232
TOKENIZERS_WITH_CHAT_TEMPLATES = {
230233
# https://huggingface.co/docs/transformers/main/en/chat_templating
231-
'Xenova/blenderbot-400M-distill': [
232-
'basic',
233-
],
234-
235234
'Xenova/mistral-tokenizer-v1': [
236235
'basic',
237236
],
@@ -240,16 +239,6 @@
240239
'system',
241240
],
242241

243-
'Xenova/llama-tokenizer': [
244-
'basic',
245-
'system',
246-
'system + assistant',
247-
],
248-
'Xenova/llama2-tokenizer': [
249-
'basic',
250-
'system',
251-
'system + assistant',
252-
],
253242
'Xenova/llama2-chat-tokenizer': [
254243
'basic',
255244
'system',

0 commit comments

Comments
 (0)