@@ -141,69 +141,92 @@ def _pick(config, *atts):
141141
142142
143143def random_input_kwargs (config : Any , task : str ) -> Tuple [Dict [str , Any ], Callable ]:
144- """Inputs kwargs"""
144+ """
145+ Inputs kwargs.
146+
147+ If the configuration is None, the function selects typical dimensions.
148+ """
149+ fcts = get_get_inputs_function_for_tasks ()
150+ assert task in fcts , f"Unsupported task { task !r} , supprted are { sorted (fcts )} "
145151 if task == "text-generation" :
146- check_hasattr (
147- config ,
148- "vocab_size" ,
149- "hidden_size" ,
150- "num_attention_heads" ,
151- ("num_key_value_heads" , "num_attention_heads" ),
152- "intermediate_size" ,
153- "hidden_size" ,
154- )
152+ if config is not None :
153+ check_hasattr (
154+ config ,
155+ "vocab_size" ,
156+ "hidden_size" ,
157+ "num_attention_heads" ,
158+ ("num_key_value_heads" , "num_attention_heads" ),
159+ "intermediate_size" ,
160+ "hidden_size" ,
161+ )
155162 kwargs = dict (
156163 batch_size = 2 ,
157164 sequence_length = 30 ,
158165 sequence_length2 = 3 ,
159- head_dim = getattr (
160- config , "head_dim" , config .hidden_size // config .num_attention_heads
166+ head_dim = (
167+ 16
168+ if config is None
169+ else getattr (
170+ config , "head_dim" , config .hidden_size // config .num_attention_heads
171+ )
161172 ),
162- dummy_max_token_id = config .vocab_size - 1 ,
163- num_hidden_layers = config .num_hidden_layers ,
164- num_key_value_heads = _pick (config , "num_key_value_heads" , "num_attention_heads" ),
165- intermediate_size = config .intermediate_size ,
166- hidden_size = config .hidden_size ,
173+ dummy_max_token_id = 31999 if config is None else (config .vocab_size - 1 ),
174+ num_hidden_layers = 4 if config is None else config .num_hidden_layers ,
175+ num_key_value_heads = (
176+ 24
177+ if config is None
178+ else _pick (config , "num_key_value_heads" , "num_attention_heads" )
179+ ),
180+ intermediate_size = 1024 if config is None else config .intermediate_size ,
181+ hidden_size = 512 if config is None else config .hidden_size ,
167182 )
168183 fct = get_inputs_for_text_generation
169184 elif task == "text2text-generation" :
170- check_hasattr (
171- config ,
172- "vocab_size" ,
173- "hidden_size" ,
174- "num_attention_heads" ,
175- ("num_hidden_layers" , "num_layers" ),
176- ("n_positions" , "d_model" ),
177- (
178- "num_key_value_heads" ,
179- "num_heads" ,
180- ("decoder_attention_heads" , "encoder_attention_heads" ),
181- ),
182- )
185+ if config is not None :
186+ check_hasattr (
187+ config ,
188+ "vocab_size" ,
189+ "hidden_size" ,
190+ "num_attention_heads" ,
191+ ("num_hidden_layers" , "num_layers" ),
192+ ("n_positions" , "d_model" ),
193+ (
194+ "num_key_value_heads" ,
195+ "num_heads" ,
196+ ("decoder_attention_heads" , "encoder_attention_heads" ),
197+ ),
198+ )
183199 kwargs = dict (
184200 batch_size = 2 ,
185201 sequence_length = 30 ,
186202 sequence_length2 = 3 ,
187- head_dim = config .d_kv if hasattr (config , "d_kv" ) else 1 ,
188- dummy_max_token_id = config .vocab_size - 1 ,
189- num_hidden_layers = _pick (config , "num_hidden_layers" , "num_layers" ),
190- num_key_value_heads = _pick (
191- config ,
192- "num_key_value_heads" ,
193- "num_heads" ,
194- (sum , "encoder_attention_heads" , "decoder_attention_heads" ),
203+ head_dim = 16 if config is None else (config .d_kv if hasattr (config , "d_kv" ) else 1 ),
204+ dummy_max_token_id = 31999 if config is None else config .vocab_size - 1 ,
205+ num_hidden_layers = (
206+ 8 if config is None else _pick (config , "num_hidden_layers" , "num_layers" )
207+ ),
208+ num_key_value_heads = (
209+ 16
210+ if config is None
211+ else _pick (
212+ config ,
213+ "num_key_value_heads" ,
214+ "num_heads" ,
215+ (sum , "encoder_attention_heads" , "decoder_attention_heads" ),
216+ )
195217 ),
196- encoder_dim = _pick (config , "n_positions" , "d_model" ),
218+ encoder_dim = 512 if config is None else _pick (config , "n_positions" , "d_model" ),
197219 )
198220 fct = get_inputs_for_text2text_generation # type: ignore
199221 elif task == "image-classification" :
200- check_hasattr (config , "image_size" , "num_channels" )
201- if isinstance (config .image_size , int ):
222+ if config is not None :
223+ check_hasattr (config , "image_size" , "num_channels" )
224+ if config is None or isinstance (config .image_size , int ):
202225 kwargs = dict (
203226 batch_size = 2 ,
204- input_width = config .image_size ,
205- input_height = config .image_size ,
206- input_channels = config .num_channels ,
227+ input_width = 224 if config is None else config .image_size ,
228+ input_height = 224 if config is None else config .image_size ,
229+ input_channels = 3 if config is None else config .num_channels ,
207230 )
208231 else :
209232 kwargs = dict (
@@ -214,32 +237,41 @@ def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callabl
214237 )
215238 fct = get_inputs_for_image_classification # type: ignore
216239 elif task == "image-text-to-text" :
217- check_hasattr (
218- config ,
219- "vocab_size" ,
220- "hidden_size" ,
221- "num_attention_heads" ,
222- ("num_key_value_heads" , "num_attention_heads" ),
223- "intermediate_size" ,
224- "hidden_size" ,
225- "vision_config" ,
226- )
227- check_hasattr (config .vision_config , "image_size" , "num_channels" )
240+ if config is not None :
241+ check_hasattr (
242+ config ,
243+ "vocab_size" ,
244+ "hidden_size" ,
245+ "num_attention_heads" ,
246+ ("num_key_value_heads" , "num_attention_heads" ),
247+ "intermediate_size" ,
248+ "hidden_size" ,
249+ "vision_config" ,
250+ )
251+ check_hasattr (config .vision_config , "image_size" , "num_channels" )
228252 kwargs = dict (
229253 batch_size = 2 ,
230254 sequence_length = 30 ,
231255 sequence_length2 = 3 ,
232- head_dim = getattr (
233- config , "head_dim" , config .hidden_size // config .num_attention_heads
256+ head_dim = (
257+ 16
258+ if config is None
259+ else getattr (
260+ config , "head_dim" , config .hidden_size // config .num_attention_heads
261+ )
262+ ),
263+ dummy_max_token_id = 31999 if config is None else config .vocab_size - 1 ,
264+ num_hidden_layers = 4 if config is None else config .num_hidden_layers ,
265+ num_key_value_heads = (
266+ 8
267+ if config is None
268+ else _pick (config , "num_key_value_heads" , "num_attention_heads" )
234269 ),
235- dummy_max_token_id = config .vocab_size - 1 ,
236- num_hidden_layers = config .num_hidden_layers ,
237- num_key_value_heads = _pick (config , "num_key_value_heads" , "num_attention_heads" ),
238- intermediate_size = config .intermediate_size ,
239- hidden_size = config .hidden_size ,
240- width = config .vision_config .image_size ,
241- height = config .vision_config .image_size ,
242- num_channels = config .vision_config .num_channels ,
270+ intermediate_size = 1024 if config is None else config .intermediate_size ,
271+ hidden_size = 512 if config is None else config .hidden_size ,
272+ width = 224 if config is None else config .vision_config .image_size ,
273+ height = 224 if config is None else config .vision_config .image_size ,
274+ num_channels = 3 if config is None else config .vision_config .num_channels ,
243275 )
244276 fct = get_inputs_for_image_text_to_text # type: ignore
245277 else :
@@ -682,3 +714,13 @@ def get_inputs_for_text2text_generation(
682714 # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim),
683715 )
684716 return dict (inputs = inputs , dynamic_shapes = shapes )
717+
718+
719+ def get_get_inputs_function_for_tasks () -> Dict [str , Callable ]:
720+ """Returns all the function producing dummy inputs for every task."""
721+ return {
722+ "image-classification" : get_inputs_for_image_classification ,
723+ "text-generation" : get_inputs_for_text_generation ,
724+ "text2text-generation" : get_inputs_for_text2text_generation ,
725+ "image-text-to-text" : get_inputs_for_image_text_to_text ,
726+ }
0 commit comments