From 1480fdfc591c4a55d603f91612d0de4b2a8424c7 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 21 Jun 2025 11:19:20 +0200 Subject: [PATCH 1/5] really use pretrained version --- onnx_diagnostic/_command_lines_parser.py | 36 +++++++- onnx_diagnostic/tasks/image_text_to_text.py | 84 +++++++++++++++---- onnx_diagnostic/torch_models/hghub/hub_api.py | 26 +++++- .../torch_models/hghub/hub_data.py | 7 +- .../torch_models/hghub/model_inputs.py | 33 +++++--- onnx_diagnostic/torch_models/validate.py | 13 ++- 6 files changed, 157 insertions(+), 42 deletions(-) diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py index 806068d5..e08f73b2 100644 --- a/onnx_diagnostic/_command_lines_parser.py +++ b/onnx_diagnostic/_command_lines_parser.py @@ -333,7 +333,24 @@ def get_parser_validate() -> ArgumentParser: of supported tasks. """ ), - epilog="If the model id is specified, one untrained version of it is instantiated.", + epilog=textwrap.dedent( + """ + If the model id is specified, one untrained version of it is instantiated. + Examples: + + python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\ + --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\ + --dtype float16 --device cuda --patch --export onnx-dynamo --opt ir + + python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\ + --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\ + --dtype float16 --device cuda --patch --export custom --opt default + + python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\ + --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\ + --dtype float16 --device cuda --export modelbuilder + """ + ), formatter_class=RawTextHelpFormatter, ) parser.add_argument("-m", "--mid", type=str, help="model id, usually /") @@ -372,6 +389,12 @@ def get_parser_validate() -> ArgumentParser: type=int, help="Raises an exception if a dynamic dimension becomes static.", ) + parser.add_argument( + "--same-as-trained", + default=False, + action=BooleanOptionalAction, + help="Validates a model identical to the trained model but not trained.", + ) parser.add_argument( "--trained", default=False, @@ -487,7 +510,8 @@ def _cmd_validate(argv: List[Any]): do_run=args.run, verbose=args.verbose, quiet=args.quiet, - trained=args.trained, + same_as_pretrained=args.same_as_trained, + use_pretrained=args.trained, dtype=args.dtype, device=args.device, patch=args.patch, @@ -619,7 +643,13 @@ def get_parser_agg() -> ArgumentParser: and produces values. Every row has a date. """ ), - epilog="example\n python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1", + epilog=textwrap.dedent( + """ + examples:\n + + python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1 + """ + ), formatter_class=RawTextHelpFormatter, ) parser.add_argument("output", help="output excel file") diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py index 1ae22537..7decd6d9 100644 --- a/onnx_diagnostic/tasks/image_text_to_text.py +++ b/onnx_diagnostic/tasks/image_text_to_text.py @@ -132,16 +132,30 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]: If the configuration is None, the function selects typical dimensions. """ if config is not None: - check_hasattr( - config, - "vocab_size", - "hidden_size", - "num_attention_heads", - ("num_key_value_heads", "num_attention_heads"), - "intermediate_size", - "hidden_size", - "vision_config", - ) + if hasattr(config, "text_config"): + check_hasattr( + config.text_config, + "vocab_size", + "hidden_size", + "num_attention_heads", + ("num_key_value_heads", "num_attention_heads"), + "intermediate_size", + "hidden_size", + ) + check_hasattr(config, "vision_config") + text_config = True + else: + check_hasattr( + config, + "vocab_size", + "hidden_size", + "num_attention_heads", + ("num_key_value_heads", "num_attention_heads"), + "intermediate_size", + "hidden_size", + "vision_config", + ) + text_config = False check_hasattr(config.vision_config, "image_size", "num_channels") kwargs = dict( batch_size=2, @@ -150,17 +164,55 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]: head_dim=( 16 if config is None - else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads) + else getattr( + config, + "head_dim", + (config.text_config.hidden_size if text_config else config.config.hidden_size) + // ( + config.text_config.num_attention_heads + if text_config + else config.config.num_attention_heads + ), + ) + ), + dummy_max_token_id=( + 31999 + if config is None + else (config.text_config.vocab_size if text_config else config.config.vocab_size) + - 1 + ), + num_hidden_layers=( + 4 + if config is None + else ( + config.text_config.num_hidden_layers + if text_config + else config.config.num_hidden_layers + ) ), - dummy_max_token_id=31999 if config is None else config.vocab_size - 1, - num_hidden_layers=4 if config is None else config.num_hidden_layers, num_key_value_heads=( 8 if config is None - else _pick(config, "num_key_value_heads", "num_attention_heads") + else ( + _pick(config.text_config, "num_key_value_heads", "num_attention_heads") + if text_config + else _pick(config, "num_key_value_heads", "num_attention_heads") + ) + ), + intermediate_size=( + 1024 + if config is None + else ( + config.text_config.intermediate_size + if text_config + else config.config.intermediate_size + ) + ), + hidden_size=( + 512 + if config is None + else (config.text_config.hidden_size if text_config else config.hidden_size) ), - intermediate_size=1024 if config is None else config.intermediate_size, - hidden_size=512 if config is None else config.hidden_size, width=224 if config is None else config.vision_config.image_size, height=224 if config is None else config.vision_config.image_size, num_channels=3 if config is None else config.vision_config.num_channels, diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py index 8b58b4ed..2c25f6eb 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_api.py +++ b/onnx_diagnostic/torch_models/hghub/hub_api.py @@ -138,12 +138,15 @@ def _guess_task_from_config(config: Any) -> Optional[str]: @functools.cache -def task_from_arch(arch: str, default_value: Optional[str] = None) -> str: +def task_from_arch( + arch: str, default_value: Optional[str] = None, model_id: Optional[str] = None +) -> str: """ This function relies on stored information. That information needs to be refresh. :param arch: architecture name :param default_value: default value in case the task cannot be determined + :param model_id: unused unless the architecture does not help. :return: task .. runpython:: @@ -156,9 +159,16 @@ def task_from_arch(arch: str, default_value: Optional[str] = None) -> str: `. """ data = load_architecture_task() + if arch not in data and model_id: + # Let's try with the model id. + return task_from_id(model_id) if default_value is not None: return data.get(arch, default_value) - assert arch in data, f"Architecture {arch!r} is unknown, last refresh in {__date__}" + assert arch in data, ( + f"Architecture {arch!r} is unknown, last refresh in {__date__}. " + f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__`` " + f"needs to be updated (model_id={(model_id or '?')!r})." + ) return data[arch] @@ -176,6 +186,7 @@ def task_from_id( if the task cannot be determined :param pretrained: uses the config :param fall_back_to_pretrained: falls back to pretrained config + :param exc: raises an excpetion if True :return: task """ if not pretrained: @@ -191,11 +202,18 @@ def task_from_id( guess = _guess_task_from_config(config) if guess is not None: return guess + data = load_architecture_task() + if model_id in data: + return data[model_id] assert config.architectures is not None and len(config.architectures) == 1, ( f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, " - f"architectures={config.architectures} in config={config}" + f"architectures={config.architectures} in config={config}. " + f"The task can be added in " + f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__``." + ) + return task_from_arch( + config.architectures[0], default_value=default_value, model_id=model_id ) - return task_from_arch(config.architectures[0], default_value=default_value) def task_from_tags(tags: Union[str, List[str]]) -> str: diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py index 6e31de4e..57425e5e 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_data.py +++ b/onnx_diagnostic/torch_models/hghub/hub_data.py @@ -3,7 +3,7 @@ import textwrap from typing import Dict, List -__date__ = "2025-03-26" +__date__ = "2025-06-21" __data_arch_values__ = {"ResNetForImageClassification": dict(image_size=224)} @@ -52,6 +52,8 @@ GPTNeoModel,feature-extraction GPTNeoXForCausalLM,text-generation GemmaForCausalLM,text-generation + Gemma2ForCausalLM,text-generation + Gemma3ForConditionalGeneration,image-text-to-text GraniteForCausalLM,text-generation GroupViTModel,feature-extraction HieraForImageClassification,image-classification @@ -144,7 +146,8 @@ XLMRobertaModel,sentence-similarity Wav2Vec2ForCTC,automatic-speech-recognition YolosForObjectDetection,object-detection - YolosModel,image-feature-extraction""" + YolosModel,image-feature-extraction + emilyalsentzer/Bio_ClinicalBERT,fill-mask""" ) __data_tasks__ = [ diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py index bff1ef75..7f32227e 100644 --- a/onnx_diagnostic/torch_models/hghub/model_inputs.py +++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py @@ -1,5 +1,6 @@ import inspect import os +import pprint from typing import Any, Dict, Optional, Tuple import torch import transformers @@ -22,6 +23,7 @@ def get_untrained_model_with_inputs( model_kwargs: Optional[Dict[str, Any]] = None, verbose: int = 0, dynamic_rope: Optional[bool] = None, + use_pretrained: bool = False, same_as_pretrained: bool = False, use_preinstalled: bool = True, add_second_input: bool = False, @@ -43,6 +45,7 @@ def get_untrained_model_with_inputs( :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`) :param same_as_pretrained: if True, do not change the default values to get a smaller model + :param use_pretrained: download the pretrained weights as well :param use_preinstalled: use preinstalled configurations :param add_second_input: provides a second inputs to check a model supports different shapes @@ -68,6 +71,10 @@ def get_untrained_model_with_inputs( print("-- dynamic shapes:", pprint.pformat(data['dynamic_shapes'])) print("-- configuration:", pprint.pformat(data['configuration'])) """ + assert not use_preinstalled or not use_only_preinstalled, ( + f"model_id={model_id!r}, pretinstalled model is only avaialble " + f"if use_only_preinstalled is False." + ) if verbose: print(f"[get_untrained_model_with_inputs] model_id={model_id!r}") if use_preinstalled: @@ -99,7 +106,7 @@ def get_untrained_model_with_inputs( print(f"[get_untrained_model_with_inputs] architectures={archs!r}") print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}") if task is None: - task = task_from_arch(archs[0]) + task = task_from_arch(archs[0], model_id=model_id) if verbose: print(f"[get_untrained_model_with_inputs] task={task!r}") @@ -114,7 +121,6 @@ def get_untrained_model_with_inputs( ) # updating the configuration - mkwargs = reduce_model_config(config, task) if not same_as_pretrained else {} if model_kwargs: for k, v in model_kwargs.items(): @@ -139,27 +145,28 @@ def get_untrained_model_with_inputs( f"{config._attn_implementation!r}" # type: ignore[union-attr] ) + if use_pretrained: + model = transformers.AutoModel.from_pretrained(model_id, **mkwargs) + else: + if archs is not None: + model = getattr(transformers, archs[0])(config) + else: + assert same_as_pretrained and use_pretrained, ( + f"Model {model_id!r} cannot be built, the model cannot be built. " + f"It must be downloaded. Use same_as_pretrained=True " + f"and use_pretrained=True." + ) + # input kwargs kwargs, fct = random_input_kwargs(config, task) if verbose: print(f"[get_untrained_model_with_inputs] use fct={fct}") if os.environ.get("PRINT_CONFIG") in (1, "1"): - import pprint - print(f"-- input kwargs for task {task!r}") pprint.pprint(kwargs) if inputs_kwargs: kwargs.update(inputs_kwargs) - if archs is not None: - model = getattr(transformers, archs[0])(config) - else: - assert same_as_pretrained, ( - f"Model {model_id!r} cannot be built, the model cannot be built. " - f"It must be downloaded. Use same_as_pretrained=True." - ) - model = None - # This line is important. Some models may produce different # outputs even with the same inputs in training mode. model.eval() diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py index f79409a3..c27f744f 100644 --- a/onnx_diagnostic/torch_models/validate.py +++ b/onnx_diagnostic/torch_models/validate.py @@ -259,7 +259,8 @@ def validate_model( verbose: int = 0, dtype: Optional[Union[str, torch.dtype]] = None, device: Optional[Union[str, torch.device]] = None, - trained: bool = False, + same_as_pretrained: bool = False, + use_pretrained: bool = False, optimization: Optional[str] = None, quiet: bool = False, patch: bool = False, @@ -294,7 +295,9 @@ def validate_model( :param verbose: verbosity level :param dtype: uses this dtype to check the model :param device: do the verification on this device - :param trained: use the trained model, not the untrained one + :param same_as_pretrained: use a model equivalent to the trained, + this is not always possible + :param use_pretrained: use the trained model, not the untrained one :param optimization: optimization to apply to the exported model, depend on the the exporter :param quiet: if quiet, catches exception if any issue @@ -353,7 +356,8 @@ def validate_model( version_do_run=str(do_run), version_dtype=str(dtype or ""), version_device=str(device or ""), - version_trained=str(trained), + version_same_as_pretrained=str(same_as_pretrained), + version_use_pretrained=str(use_pretrained), version_optimization=optimization or "", version_quiet=str(quiet), version_patch=str(patch), @@ -408,11 +412,12 @@ def validate_model( summary, None, ( - lambda mid=model_id, v=verbose, task=task, tr=trained, iop=iop, sub=subfolder, i2=inputs2: ( # noqa: E501 + lambda mid=model_id, v=verbose, task=task, uptr=use_pretrained, tr=same_as_pretrained, iop=iop, sub=subfolder, i2=inputs2: ( # noqa: E501 get_untrained_model_with_inputs( mid, verbose=v, task=task, + use_pretrained=uptr, same_as_pretrained=tr, inputs_kwargs=iop, model_kwargs=mop, From 5a8eccfcc47ad95ee93e266261d7f42f6fcf3d4e Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 21 Jun 2025 11:25:05 +0200 Subject: [PATCH 2/5] add Phi3MoE --- onnx_diagnostic/torch_models/hghub/hub_api.py | 4 +--- onnx_diagnostic/torch_models/hghub/hub_data.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py index 2c25f6eb..2b0250da 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_api.py +++ b/onnx_diagnostic/torch_models/hghub/hub_api.py @@ -211,9 +211,7 @@ def task_from_id( f"The task can be added in " f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__``." ) - return task_from_arch( - config.architectures[0], default_value=default_value, model_id=model_id - ) + return task_from_arch(config.architectures[0], default_value=default_value) def task_from_tags(tags: Union[str, List[str]]) -> str: diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py index 57425e5e..bdf79693 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_data.py +++ b/onnx_diagnostic/torch_models/hghub/hub_data.py @@ -99,6 +99,7 @@ PegasusModel,feature-extraction Phi3ForCausalLM,text-generation PhiForCausalLM,text-generation + PhiMoEForCausalLM,text-generation Pix2StructForConditionalGeneration,image-to-text PLBartForConditionalGeneration,text2text-generation PoolFormerModel,image-feature-extraction From e099ebcadb1cf1c2cf007a816765b0b9de759041 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 21 Jun 2025 13:58:09 +0200 Subject: [PATCH 3/5] fix config --- onnx_diagnostic/tasks/image_text_to_text.py | 11 +++++------ onnx_diagnostic/torch_models/hghub/model_inputs.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py index 7decd6d9..3128707d 100644 --- a/onnx_diagnostic/tasks/image_text_to_text.py +++ b/onnx_diagnostic/tasks/image_text_to_text.py @@ -167,19 +167,18 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]: else getattr( config, "head_dim", - (config.text_config.hidden_size if text_config else config.config.hidden_size) + (config.text_config.hidden_size if text_config else config.hidden_size) // ( config.text_config.num_attention_heads if text_config - else config.config.num_attention_heads + else config.num_attention_heads ), ) ), dummy_max_token_id=( 31999 if config is None - else (config.text_config.vocab_size if text_config else config.config.vocab_size) - - 1 + else (config.text_config.vocab_size if text_config else config.vocab_size) - 1 ), num_hidden_layers=( 4 @@ -187,7 +186,7 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]: else ( config.text_config.num_hidden_layers if text_config - else config.config.num_hidden_layers + else config.num_hidden_layers ) ), num_key_value_heads=( @@ -205,7 +204,7 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]: else ( config.text_config.intermediate_size if text_config - else config.config.intermediate_size + else config.intermediate_size ) ), hidden_size=( diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py index 7f32227e..30448fda 100644 --- a/onnx_diagnostic/torch_models/hghub/model_inputs.py +++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py @@ -72,7 +72,7 @@ def get_untrained_model_with_inputs( print("-- configuration:", pprint.pformat(data['configuration'])) """ assert not use_preinstalled or not use_only_preinstalled, ( - f"model_id={model_id!r}, pretinstalled model is only avaialble " + f"model_id={model_id!r}, pretinstalled model is only available " f"if use_only_preinstalled is False." ) if verbose: From 47273ab724cc2e867a68867c32a813d0f9913fd3 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 21 Jun 2025 14:20:25 +0200 Subject: [PATCH 4/5] spell --- onnx_diagnostic/torch_models/hghub/hub_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py index 2b0250da..b78b093f 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_api.py +++ b/onnx_diagnostic/torch_models/hghub/hub_api.py @@ -186,7 +186,7 @@ def task_from_id( if the task cannot be determined :param pretrained: uses the config :param fall_back_to_pretrained: falls back to pretrained config - :param exc: raises an excpetion if True + :param exc: raises an exception if True :return: task """ if not pretrained: From 88fe0ad114493225d1d803499042db13f846cd33 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 22 Jun 2025 12:34:54 +0200 Subject: [PATCH 5/5] fix order of inputs --- onnx_diagnostic/tasks/image_text_to_text.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py index 3128707d..4400b772 100644 --- a/onnx_diagnostic/tasks/image_text_to_text.py +++ b/onnx_diagnostic/tasks/image_text_to_text.py @@ -96,10 +96,10 @@ def get_inputs( for i in range(num_hidden_layers) ] ), - image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to( + pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to( torch.int64 ), - pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to( + image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to( torch.int64 ), )