Skip to content

Commit 14d9b1a

Browse files
committed
complete
1 parent a794842 commit 14d9b1a

16 files changed

+1032
-869
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import unittest
2+
import transformers
3+
from onnx_diagnostic.ext_test_case import (
4+
ExtTestCase,
5+
requires_torch,
6+
requires_transformers,
7+
)
8+
from onnx_diagnostic.helpers.config_helper import config_class_from_architecture
9+
10+
11+
class TestConfigHelper(ExtTestCase):
12+
@requires_transformers("4.50") # we limit to some versions of the CI
13+
@requires_torch("2.7")
14+
def test_config_class_from_architecture(self):
15+
config = config_class_from_architecture("LlamaForCausalLM")
16+
self.assertEqual(config, transformers.LlamaConfig)
17+
18+
19+
if __name__ == "__main__":
20+
unittest.main(verbosity=2)

_unittests/ut_tasks/test_tasks.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import unittest
2+
import torch
3+
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
4+
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
5+
from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
6+
7+
8+
class TestTasks(ExtTestCase):
9+
@hide_stdout()
10+
def test_text2text_generation(self):
11+
mid = "sshleifer/tiny-marian-en-de"
12+
# mid = "Salesforce/codet5-small"
13+
data = get_untrained_model_with_inputs(mid, verbose=1)
14+
self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
15+
model, inputs = data["model"], data["inputs"]
16+
raise unittest.SkipTest(f"not working for {mid!r}")
17+
model(**inputs)
18+
19+
@hide_stdout()
20+
def test_automatic_speech_recognition(self):
21+
mid = "openai/whisper-tiny"
22+
data = get_untrained_model_with_inputs(mid, verbose=1)
23+
self.assertIn((data["size"], data["n_weights"]), [(132115968, 33028992)])
24+
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
25+
Dim = torch.export.Dim
26+
self.maxDiff = None
27+
self.assertIn("{0:Dim(batch),1:Dim(seq_length)}", self.string_type(ds))
28+
self.assertEqualAny(
29+
{
30+
"decoder_input_ids": {
31+
0: Dim("batch", min=1, max=1024),
32+
1: Dim("seq_length", min=1, max=4096),
33+
},
34+
"cache_position": {0: Dim("seq_length", min=1, max=4096)},
35+
"encoder_outputs": [{0: Dim("batch", min=1, max=1024)}],
36+
"past_key_values": [
37+
[
38+
[
39+
{0: Dim("batch", min=1, max=1024)},
40+
{0: Dim("batch", min=1, max=1024)},
41+
],
42+
[
43+
{0: Dim("batch", min=1, max=1024)},
44+
{0: Dim("batch", min=1, max=1024)},
45+
],
46+
],
47+
[
48+
[
49+
{0: Dim("batch", min=1, max=1024)},
50+
{0: Dim("batch", min=1, max=1024)},
51+
],
52+
[
53+
{0: Dim("batch", min=1, max=1024)},
54+
{0: Dim("batch", min=1, max=1024)},
55+
],
56+
],
57+
],
58+
},
59+
ds,
60+
)
61+
model(**inputs)
62+
self.assertEqual(
63+
"#1[T1r3]",
64+
self.string_type(torch.utils._pytree.tree_flatten(inputs["encoder_outputs"])[0]),
65+
)
66+
with bypass_export_some_errors(patch_transformers=True, verbose=10):
67+
flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0]
68+
self.assertIsInstance(flat, list)
69+
self.assertIsInstance(flat[0], torch.Tensor)
70+
self.assertEqual(
71+
"#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]",
72+
self.string_type(flat),
73+
)
74+
torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False)
75+
with bypass_export_some_errors(patch_transformers=True, verbose=10):
76+
flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0]
77+
self.assertIsInstance(flat, list)
78+
self.assertIsInstance(flat[0], torch.Tensor)
79+
self.assertEqual(
80+
"#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]",
81+
self.string_type(flat),
82+
)
83+
torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False)
84+
85+
@hide_stdout()
86+
def test_imagetext2text_generation(self):
87+
mid = "HuggingFaceM4/tiny-random-idefics"
88+
# mid = "Salesforce/codet5-small"
89+
data = get_untrained_model_with_inputs(mid, verbose=1)
90+
self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
91+
model, inputs = data["model"], data["inputs"]
92+
model(**inputs)
93+
94+
@hide_stdout()
95+
def test_fill_mask(self):
96+
mid = "google-bert/bert-base-multilingual-cased"
97+
# mid = "Salesforce/codet5-small"
98+
data = get_untrained_model_with_inputs(mid, verbose=1)
99+
self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
100+
model, inputs = data["model"], data["inputs"]
101+
model(**inputs)
102+
103+
104+
if __name__ == "__main__":
105+
unittest.main(verbosity=2)

_unittests/ut_torch_models/try_tasks.py renamed to _unittests/ut_tasks/try_tasks.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
class TestHuggingFaceHubModel(ExtTestCase):
88
@never_test()
99
def test_image_classification(self):
10-
# clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k image_c
10+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k image_c
1111

1212
from transformers import ViTImageProcessor, ViTModel
1313
from PIL import Image
@@ -27,7 +27,7 @@ def test_image_classification(self):
2727

2828
@never_test()
2929
def test_image_classification_resnet(self):
30-
# clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k resnet
30+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k resnet
3131

3232
from transformers import ViTImageProcessor, ViTModel
3333
from PIL import Image
@@ -47,7 +47,7 @@ def test_image_classification_resnet(self):
4747

4848
@never_test()
4949
def test_zero_shot_image_classification(self):
50-
# clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k zero
50+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k zero
5151
from PIL import Image
5252
import requests
5353
from transformers import CLIPProcessor, CLIPModel
@@ -74,7 +74,7 @@ def test_zero_shot_image_classification(self):
7474

7575
@never_test()
7676
def test_text2text_generation(self):
77-
# clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k text2t
77+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k text2t
7878

7979
import torch
8080
from transformers import RobertaTokenizer, T5ForConditionalGeneration
@@ -100,7 +100,7 @@ def test_text2text_generation(self):
100100

101101
@never_test()
102102
def test_imagetext2text_generation(self):
103-
# clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k etext2t
103+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k etext2t
104104
# https://huggingface.co/docs/transformers/main/en/tasks/idefics
105105

106106
import torch
@@ -131,7 +131,7 @@ def test_imagetext2text_generation(self):
131131

132132
@never_test()
133133
def test_automatic_speech_recognition(self):
134-
# clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k automatic_speech
134+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k automatic_speech
135135
# https://huggingface.co/openai/whisper-tiny
136136

137137
from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -195,6 +195,22 @@ def test_automatic_speech_recognition(self):
195195
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
196196
print("--", transcription)
197197

198+
@never_test()
199+
def test_fill_mask(self):
200+
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k fill
201+
# https://huggingface.co/google-bert/bert-base-multilingual-cased
202+
203+
from transformers import BertTokenizer, BertModel
204+
205+
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
206+
model = BertModel.from_pretrained("bert-base-multilingual-cased")
207+
text = "Replace me by any text you'd like."
208+
encoded_input = tokenizer(text, return_tensors="pt")
209+
print()
210+
print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
211+
output = model(**encoded_input)
212+
print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
213+
198214

199215
if __name__ == "__main__":
200216
unittest.main(verbosity=2)

_unittests/ut_torch_models/test_hghub_model.py

Lines changed: 1 addition & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,19 @@
11
import pprint
22
import unittest
3-
import torch
4-
import transformers
53
from onnx_diagnostic.ext_test_case import (
64
ExtTestCase,
75
hide_stdout,
86
requires_torch,
97
requires_transformers,
108
ignore_errors,
119
)
12-
from onnx_diagnostic.torch_models.hghub.model_inputs import (
13-
config_class_from_architecture,
14-
get_untrained_model_with_inputs,
15-
)
10+
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
1611
from onnx_diagnostic.torch_models.hghub.hub_api import get_pretrained_config
1712
from onnx_diagnostic.torch_models.hghub.hub_data import load_models_testing
1813
from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
1914

2015

2116
class TestHuggingFaceHubModel(ExtTestCase):
22-
@requires_transformers("4.50") # we limit to some versions of the CI
23-
@requires_torch("2.7")
24-
def test_config_class_from_architecture(self):
25-
config = config_class_from_architecture("LlamaForCausalLM")
26-
self.assertEqual(config, transformers.LlamaConfig)
27-
2817
@hide_stdout()
2918
def test_get_untrained_model_with_inputs_tiny_llm(self):
3019
mid = "arnir0/Tiny-LLM"
@@ -107,91 +96,6 @@ def test_get_untrained_model_with_inputs_clip_vit(self):
10796
# different expected value for different version of transformers
10897
self.assertIn((data["size"], data["n_weights"]), [(188872708, 47218177)])
10998

110-
@hide_stdout()
111-
def test_get_untrained_model_with_inputs_text2text_generation(self):
112-
mid = "sshleifer/tiny-marian-en-de"
113-
# mid = "Salesforce/codet5-small"
114-
data = get_untrained_model_with_inputs(mid, verbose=1)
115-
self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
116-
model, inputs = data["model"], data["inputs"]
117-
raise unittest.SkipTest(f"not working for {mid!r}")
118-
model(**inputs)
119-
120-
@hide_stdout()
121-
def test_get_untrained_model_with_inputs_automatic_speech_recognition(self):
122-
mid = "openai/whisper-tiny"
123-
data = get_untrained_model_with_inputs(mid, verbose=1)
124-
self.assertIn((data["size"], data["n_weights"]), [(132115968, 33028992)])
125-
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
126-
Dim = torch.export.Dim
127-
self.maxDiff = None
128-
self.assertIn("{0:Dim(batch),1:Dim(seq_length)}", self.string_type(ds))
129-
self.assertEqualAny(
130-
{
131-
"decoder_input_ids": {
132-
0: Dim("batch", min=1, max=1024),
133-
1: Dim("seq_length", min=1, max=4096),
134-
},
135-
"cache_position": {0: Dim("seq_length", min=1, max=4096)},
136-
"encoder_outputs": [{0: Dim("batch", min=1, max=1024)}],
137-
"past_key_values": [
138-
[
139-
[
140-
{0: Dim("batch", min=1, max=1024)},
141-
{0: Dim("batch", min=1, max=1024)},
142-
],
143-
[
144-
{0: Dim("batch", min=1, max=1024)},
145-
{0: Dim("batch", min=1, max=1024)},
146-
],
147-
],
148-
[
149-
[
150-
{0: Dim("batch", min=1, max=1024)},
151-
{0: Dim("batch", min=1, max=1024)},
152-
],
153-
[
154-
{0: Dim("batch", min=1, max=1024)},
155-
{0: Dim("batch", min=1, max=1024)},
156-
],
157-
],
158-
],
159-
},
160-
ds,
161-
)
162-
model(**inputs)
163-
self.assertEqual(
164-
"#1[T1r3]",
165-
self.string_type(torch.utils._pytree.tree_flatten(inputs["encoder_outputs"])[0]),
166-
)
167-
with bypass_export_some_errors(patch_transformers=True, verbose=10):
168-
flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0]
169-
self.assertIsInstance(flat, list)
170-
self.assertIsInstance(flat[0], torch.Tensor)
171-
self.assertEqual(
172-
"#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]",
173-
self.string_type(flat),
174-
)
175-
torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False)
176-
with bypass_export_some_errors(patch_transformers=True, verbose=10):
177-
flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0]
178-
self.assertIsInstance(flat, list)
179-
self.assertIsInstance(flat[0], torch.Tensor)
180-
self.assertEqual(
181-
"#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]",
182-
self.string_type(flat),
183-
)
184-
torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False)
185-
186-
@hide_stdout()
187-
def test_get_untrained_model_with_inputs_imagetext2text_generation(self):
188-
mid = "HuggingFaceM4/tiny-random-idefics"
189-
# mid = "Salesforce/codet5-small"
190-
data = get_untrained_model_with_inputs(mid, verbose=1)
191-
self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
192-
model, inputs = data["model"], data["inputs"]
193-
model(**inputs)
194-
19599
@hide_stdout()
196100
@requires_torch("2.7", "reduce test time")
197101
@requires_transformers("4.50", "reduce test time")
@@ -210,11 +114,9 @@ def _diff(c1, c2):
210114
for mid in load_models_testing():
211115
with self.subTest(mid=mid):
212116
if mid in {
213-
"hf-internal-testing/tiny-random-BeitForImageClassification",
214117
"hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation",
215118
"hf-internal-testing/tiny-random-MoonshineForConditionalGeneration",
216119
"fxmarty/pix2struct-tiny-random",
217-
"hf-internal-testing/tiny-random-ViTMSNForImageClassification",
218120
"hf-internal-testing/tiny-random-YolosModel",
219121
}:
220122
print(f"-- not implemented yet for {mid!r}")

onnx_diagnostic/_command_lines_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,13 +303,13 @@ def get_parser_validate() -> ArgumentParser:
303303
def _cmd_validate(argv: List[Any]):
304304
from .helpers import string_type
305305
from .torch_models.test_helper import get_inputs_for_task, validate_model, _ds_clean
306-
from .torch_models.hghub.model_inputs import get_get_inputs_function_for_tasks
306+
from .tasks import supported_tasks
307307

308308
parser = get_parser_validate()
309309
args = parser.parse_args(argv[1:])
310310
if not args.task and not args.mid:
311311
print("-- list of supported tasks:")
312-
print("\n".join(sorted(get_get_inputs_function_for_tasks())))
312+
print("\n".join(supported_tasks()))
313313
elif not args.mid:
314314
data = get_inputs_for_task(args.task)
315315
if args.verbose:

0 commit comments

Comments
 (0)