1+ import pprint
12import unittest
23import transformers
34from onnx_diagnostic .ext_test_case import (
45 ExtTestCase ,
56 hide_stdout ,
7+ long_test ,
68 requires_torch ,
79 requires_transformers ,
810)
911from onnx_diagnostic .torch_models .hghub .model_inputs import (
1012 config_class_from_architecture ,
1113 get_untrained_model_with_inputs ,
1214)
15+ from onnx_diagnostic .torch_models .hghub .hub_api import get_pretrained_config
16+ from onnx_diagnostic .torch_models .hghub .hub_data import load_models_testing
1317
1418
1519class TestHuggingFaceHubModel (ExtTestCase ):
@@ -23,25 +27,38 @@ def test_config_class_from_architecture(self):
2327 def test_get_untrained_model_with_inputs_tiny_llm (self ):
2428 mid = "arnir0/Tiny-LLM"
2529 data = get_untrained_model_with_inputs (mid , verbose = 1 )
30+ self .assertEqual (
31+ set (data ),
32+ {
33+ "model" ,
34+ "inputs" ,
35+ "dynamic_shapes" ,
36+ "configuration" ,
37+ "size" ,
38+ "n_weights" ,
39+ "input_kwargs" ,
40+ "model_kwargs" ,
41+ },
42+ )
2643 model , inputs = data ["model" ], data ["inputs" ]
2744 model (** inputs )
28- self .assertEqual ((1858125824 , 464531456 ), (data ["size" ], data ["n_weights" ]))
45+ self .assertEqual ((51955968 , 12988992 ), (data ["size" ], data ["n_weights" ]))
2946
3047 @hide_stdout ()
3148 def test_get_untrained_model_with_inputs_tiny_xlm_roberta (self ):
3249 mid = "hf-internal-testing/tiny-xlm-roberta" # XLMRobertaConfig
3350 data = get_untrained_model_with_inputs (mid , verbose = 1 )
3451 model , inputs = data ["model" ], data ["inputs" ]
3552 model (** inputs )
36- self .assertEqual ((126190824 , 31547706 ), (data ["size" ], data ["n_weights" ]))
53+ self .assertEqual ((8642088 , 2160522 ), (data ["size" ], data ["n_weights" ]))
3754
3855 @hide_stdout ()
3956 def test_get_untrained_model_with_inputs_tiny_gpt_neo (self ):
4057 mid = "hf-internal-testing/tiny-random-GPTNeoXForCausalLM"
4158 data = get_untrained_model_with_inputs (mid , verbose = 1 )
4259 model , inputs = data ["model" ], data ["inputs" ]
4360 model (** inputs )
44- self .assertEqual ((4291141632 , 1072785408 ), (data ["size" ], data ["n_weights" ]))
61+ self .assertEqual ((316712 , 79178 ), (data ["size" ], data ["n_weights" ]))
4562
4663 @hide_stdout ()
4764 def test_get_untrained_model_with_inputs_phi_2 (self ):
@@ -52,9 +69,63 @@ def test_get_untrained_model_with_inputs_phi_2(self):
5269 # different expected value for different version of transformers
5370 self .assertIn (
5471 (data ["size" ], data ["n_weights" ]),
55- [(1040293888 , 260073472 ), (1040498688 , 260124672 )],
72+ [(453330944 , 113332736 ), (453126144 , 113281536 )],
5673 )
5774
75+ @hide_stdout ()
76+ def test_get_untrained_model_with_inputs_beit (self ):
77+ mid = "hf-internal-testing/tiny-random-BeitForImageClassification"
78+ data = get_untrained_model_with_inputs (mid , verbose = 1 )
79+ model , inputs = data ["model" ], data ["inputs" ]
80+ model (** inputs )
81+ # different expected value for different version of transformers
82+ self .assertIn ((data ["size" ], data ["n_weights" ]), [(111448 , 27862 )])
83+
84+ @hide_stdout ()
85+ def test_get_untrained_model_with_inputs_codellama (self ):
86+ mid = "codellama/CodeLlama-7b-Python-hf"
87+ data = get_untrained_model_with_inputs (mid , verbose = 1 )
88+ model , inputs = data ["model" ], data ["inputs" ]
89+ model (** inputs )
90+ # different expected value for different version of transformers
91+ self .assertIn ((data ["size" ], data ["n_weights" ]), [(410532864 , 102633216 )])
92+
93+ @hide_stdout ()
94+ @long_test ()
95+ def test_get_untrained_model_Ltesting_models (self ):
96+ def _diff (c1 , c2 ):
97+ rows = [f"types { c1 .__class__ .__name__ } <> { c2 .__class__ .__name__ } " ]
98+ for k , v in c1 .__dict__ .items ():
99+ if isinstance (v , (str , dict , list , tuple , int , float )) and v != getattr (
100+ c2 , k , None
101+ ):
102+ rows .append (f"{ k } :: -- { v } ++ { getattr (c2 , k , 'MISS' )} " )
103+ return "\n " .join (rows )
104+
105+ # UNHIDE=1 LONGTEST=1 python _unittests/ut_torch_models/test_hghub_model.py -k L -f
106+ for mid in load_models_testing ():
107+ with self .subTest (mid = mid ):
108+ data = get_untrained_model_with_inputs (mid , verbose = 1 )
109+ model , inputs = data ["model" ], data ["inputs" ]
110+ try :
111+ model (** inputs )
112+ except Exception as e :
113+ diff = _diff (get_pretrained_config (mid ), data ["configuration" ])
114+ raise AssertionError (
115+ f"Computation failed due to { e } .\n --- pretrained\n "
116+ f"{ pprint .pformat (get_pretrained_config (mid ))} \n "
117+ f"--- modified\n { data ['configuration' ]} \n "
118+ f"--- diff\n { diff } "
119+ ) from e
120+ # different expected value for different version of transformers
121+ if data ["size" ] > 2 ** 30 :
122+ raise AssertionError (
123+ f"Model is too big, size={ data ['size' ] // 2 ** 20 } Mb,"
124+ f"config is\n { data ['configuration' ]} "
125+ )
126+ self .assertLess (data ["size" ], 2 ** 30 )
127+ self .assertLess (data ["n_weights" ], 2 ** 28 )
128+
58129
59130if __name__ == "__main__" :
60131 unittest .main (verbosity = 2 )
0 commit comments