From 3f17045b8ccbec184b753db12d329e7180b42505 Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 1 May 2025 00:43:54 +0200 Subject: [PATCH 1/6] doc --- _doc/examples/plot_export_tiny_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py index 84303a5b..4bd32591 100644 --- a/_doc/examples/plot_export_tiny_llm.py +++ b/_doc/examples/plot_export_tiny_llm.py @@ -5,7 +5,7 @@ ================================================================ Inputs are always dynamic with LLMs that is why dynamic shapes -needs to be specified when a LLM is exported with:func:`torch.export.export`. +needs to be specified when a LLM is exported with :func:`torch.export.export`. Most of the examples on :epkg:`HuggingFace` use method :meth:`transformers.GenerationMixin.generate` but we only want to export the model and its method ``forward``. From ed7f36f7d500faf22cc772a0b7507a28f4a6ec26 Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 1 May 2025 11:08:33 +0200 Subject: [PATCH 2/6] improve documentation --- README.rst | 5 ++ _doc/examples/plot_export_tiny_llm.py | 4 +- _doc/examples/plot_export_tiny_phi2.py | 89 ++++++++++++++++--- _doc/index.rst | 4 + .../recipes/plot_dynamic_shapes_python_int.py | 2 + 5 files changed, 88 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index e89ded82..9341f71d 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,11 @@ or Enlightening Examples +++++++++++++++++++++ +**Where to start to export a model** + +* `Export microsoft/phi-2 + `_ + **Torch Export** * `Use DYNAMIC or AUTO when exporting if dynamic shapes has constraints diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py index 4bd32591..05762187 100644 --- a/_doc/examples/plot_export_tiny_llm.py +++ b/_doc/examples/plot_export_tiny_llm.py @@ -1,8 +1,8 @@ """ .. _l-plot-tiny-llm-export: -Steel method forward to guess the dynamic shapes (with Tiny-LLM) -================================================================ +Steel method forward to guess inputs and dynamic shapes (with Tiny-LLM) +======================================================================= Inputs are always dynamic with LLMs that is why dynamic shapes needs to be specified when a LLM is exported with :func:`torch.export.export`. diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py index b91bdcb4..6e8d522a 100644 --- a/_doc/examples/plot_export_tiny_phi2.py +++ b/_doc/examples/plot_export_tiny_phi2.py @@ -1,11 +1,16 @@ """ .. _l-plot-export_tiny_phi2: -Untrained microsoft/phi-2 -========================= +====================== +Export microsoft/phi-2 +====================== -:epkg:`microsoft/phi-2` is not a big models but still quite big -when it comes to write unittest. Function +This function exports an smaller untrained model with the same architecture. +It is faster than the pretrained model. +When this works, the untrained model can be replaced by the trained one. + +:epkg:`microsoft/phi-2` is not a big model but still quite big +when it comes to write unittests. Function :func:`onnx_diagnostic.torch_models.hghub.get_untrained_model_with_inputs` can be used to create a reduced untrained version of a model coming from :epkg:`HuggingFace`. It downloads the configuration from the website @@ -14,7 +19,7 @@ the export or to compare performance. The relevance does not matter. Create the dummy model -++++++++++++++++++++++ +====================== """ import copy @@ -48,6 +53,8 @@ print(f"model {size / 2**20:1.3f} Mb with {n_weights // 1000} mille parameters.") # %% # The original model has 2.7 billion parameters. It was divided by more than 10. +# However, it can still be used with +# ``get_untrained_model_with_inputs("microsoft/phi-2", same_as_pretrained=True)``. # Let's see the configuration. print(config) @@ -72,13 +79,18 @@ # %% -# Export -# ++++++ +# Export to fx.Graph +# ================== +# +# :func:`torch.export.export` is the first step before converting +# a model into ONNX. The inputs are duplicated (with ``copy.deepcopy``) +# because the model may modify them inline (a cache for example). +# Shapes may not match on the second call with the modified inputs. -with torch_export_patches(patch_transformers=True) as modificator: +with torch_export_patches(patch_transformers=True): - # Unnecessary steps but useful in case of an error + # Two unnecessary steps but useful in case of an error # We check the cache is registered. assert is_cache_dynamic_registered() @@ -88,24 +100,26 @@ d["abs"] < 1e-5 ), f"The model with patches produces different outputs: {string_diff(d)}" - # Then we export. + # Then we export: the only import line in this section. ep = torch.export.export( untrained_model, (), - kwargs=modificator(copy.deepcopy(inputs)), + kwargs=copy.deepcopy(inputs), dynamic_shapes=use_dyn_not_str(dynamic_shapes), strict=False, # mandatory for torch==2.6 ) # We check the exported program produces the same results as well. + # This step is again unnecessary. d = max_diff(expected, ep.module()(**copy.deepcopy(inputs))) assert d["abs"] < 1e-5, f"The exported model different outputs: {string_diff(d)}" # %% # Export to ONNX -# ++++++++++++++ +# ============== # -# The export works. We can export to ONNX now. +# The export works. We can export to ONNX now +# :func:`torch.onnx.export`. # Patches are still needed because the export # applies :meth:`torch.export.ExportedProgram.run_decompositions` # may export local pieces of the model again. @@ -157,4 +171,51 @@ # It looks good. # %% -doc.plot_legend("untrained smaller\nmicrosoft/phi-2", "torch.onnx.export", "orange") +doc.plot_legend("export\nuntrained smaller\nmicrosoft/phi-2", "torch.onnx.export", "orange") + +# %% +# Possible Issues +# =============== +# +# Unknown task +# ++++++++++++ +# +# Function :func:`onnx_diagnostic.torch_models.hghub.get_untrained_model_with_inputs` +# is unabl to guess a task associated to the model. +# A different set of dummy inputs is defined for every task. +# The user needs to explicitly give that information to the function. +# Tasks are the same as the one defined by +# `HuggingFace/models `_. +# +# Inputs are incorrect +# ++++++++++++++++++++ +# +# Example :ref:`l-plot-tiny-llm-export` explains +# how to retrieve that information. If you cannot guess the dynamic +# shapes - a cache can be tricky sometimes, follow example +# :ref:`l-plot-export-with-args-kwargs`. +# +# DynamicCache or any other cache cannot be exported +# ++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# That's the role of :func:`onnx_diagnostic.torch_export_patches./torch_export_patches`. +# It registers the necessary information into pytorch to make the export +# work with these. Its need should slowly disappear until :epkg:`transformers` +# includes the serialization functions. +# +# Control Flow +# ++++++++++++ +# +# Every mixture of models goes through a control flow (a test). +# It also happens when a cache is truncated. The code of the model +# needs to be changed. See example :ref:`l-plot-export-cond`. +# +# Issue with dynamic shapes +# +++++++++++++++++++++++++ +# +# Example :ref:`l-plot-dynamic-shapes-python-int` gives one reason +# this process may fail but that's not the only one. +# Example :ref:`l-plot-export-locale-issue` gives an way to locate +# the cause but that does not cover all the possible causes. +# Raising an issue on github would be the recommended option +# until it is fixed. diff --git a/_doc/index.rst b/_doc/index.rst index 5d105c5e..ecbedbbe 100644 --- a/_doc/index.rst +++ b/_doc/index.rst @@ -65,6 +65,10 @@ or Enlightening Examples +++++++++++++++++++++ +**Where to start to export a model** + +* :ref:`l-plot-export_tiny_phi2` + **Torch Export** * :ref:`l-plot-export-cond` diff --git a/_doc/recipes/plot_dynamic_shapes_python_int.py b/_doc/recipes/plot_dynamic_shapes_python_int.py index 2685776f..277dd0c2 100644 --- a/_doc/recipes/plot_dynamic_shapes_python_int.py +++ b/_doc/recipes/plot_dynamic_shapes_python_int.py @@ -1,4 +1,6 @@ """ +.. _l-plot-dynamic-shapes-python-int: + Do not use python int with dynamic shapes ========================================= From f45f29ed1e62fb1d98476862237a99100821fdbc Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 1 May 2025 11:30:05 +0200 Subject: [PATCH 3/6] fix doc --- _doc/examples/plot_export_tiny_phi2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py index 6e8d522a..65052321 100644 --- a/_doc/examples/plot_export_tiny_phi2.py +++ b/_doc/examples/plot_export_tiny_phi2.py @@ -198,7 +198,7 @@ # DynamicCache or any other cache cannot be exported # ++++++++++++++++++++++++++++++++++++++++++++++++++ # -# That's the role of :func:`onnx_diagnostic.torch_export_patches./torch_export_patches`. +# That's the role of :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`. # It registers the necessary information into pytorch to make the export # work with these. Its need should slowly disappear until :epkg:`transformers` # includes the serialization functions. From 95026c2456f6fd9a14d3cd5f35f156a34ecc0ebe Mon Sep 17 00:00:00 2001 From: xadupre Date: Fri, 2 May 2025 12:01:23 +0200 Subject: [PATCH 4/6] fix a few things --- _doc/examples/plot_export_tiny_phi2.py | 3 ++- onnx_diagnostic/tasks/image_classification.py | 21 +++++++++++++++ onnx_diagnostic/torch_models/hghub/hub_api.py | 14 +++++++++- .../torch_models/hghub/hub_data.py | 3 ++- .../torch_models/hghub/model_inputs.py | 26 ++++++++++++++----- 5 files changed, 57 insertions(+), 10 deletions(-) diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py index 65052321..cc2d1df5 100644 --- a/_doc/examples/plot_export_tiny_phi2.py +++ b/_doc/examples/plot_export_tiny_phi2.py @@ -50,7 +50,7 @@ data["n_weights"], ) -print(f"model {size / 2**20:1.3f} Mb with {n_weights // 1000} mille parameters.") +print(f"model {size / 2**20:1.1f} Mb with {n_weights // 1000} thousands of parameters.") # %% # The original model has 2.7 billion parameters. It was divided by more than 10. # However, it can still be used with @@ -209,6 +209,7 @@ # Every mixture of models goes through a control flow (a test). # It also happens when a cache is truncated. The code of the model # needs to be changed. See example :ref:`l-plot-export-cond`. +# Loops are not supported yet. # # Issue with dynamic shapes # +++++++++++++++++++++++++ diff --git a/onnx_diagnostic/tasks/image_classification.py b/onnx_diagnostic/tasks/image_classification.py index 2d0696f2..42a02b32 100644 --- a/onnx_diagnostic/tasks/image_classification.py +++ b/onnx_diagnostic/tasks/image_classification.py @@ -7,6 +7,13 @@ def reduce_model_config(config: Any) -> Dict[str, Any]: """Reduces a model size.""" + if ( + hasattr(config, "model_type") + and config.model_type == "timm_wrapper" + and not hasattr(config, "num_hidden_layers") + ): + # We cannot reduce. + return {} check_hasattr(config, ("num_hidden_layers", "hidden_sizes")) kwargs = dict( num_hidden_layers=( @@ -82,6 +89,20 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]: If the configuration is None, the function selects typical dimensions. """ if config is not None: + if ( + hasattr(config, "model_type") + and config.model_type == "timm_wrapper" + and not hasattr(config, "num_hidden_layers") + ): + input_size = config.pretrained_cfg["input_size"] + kwargs = dict( + batch_size=2, + input_width=input_size[-2], + input_height=input_size[-1], + input_channels=input_size[-3], + ) + return kwargs, get_inputs + check_hasattr(config, ("image_size", "architectures"), "num_channels") if config is not None: if hasattr(config, "image_size"): diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py index ad136c4b..27caf0ff 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_api.py +++ b/onnx_diagnostic/torch_models/hghub/hub_api.py @@ -88,6 +88,15 @@ def get_model_info(model_id) -> Any: return model_info(model_id) +def _guess_task_from_config(config: Any) -> Optional[str]: + """Tries to infer a task from the configuration.""" + if hasattr(config, "bbox_loss_coefficient") and hasattr(config, "giou_loss_coefficient"): + return "object-detection" + if hasattr(config, "architecture") and config.architecture: + return task_from_arch(config.architecture) + return None + + @functools.cache def task_from_arch(arch: str, default_value: Optional[str] = None) -> str: """ @@ -126,7 +135,7 @@ def task_from_id( :param default_value: if specified, the function returns this value if the task cannot be determined :param pretrained: uses the config - :param fall_back_to_pretrained: balls back to pretrained config + :param fall_back_to_pretrained: falls back to pretrained config :return: task """ if not pretrained: @@ -139,6 +148,9 @@ def task_from_id( try: return config.pipeline_tag except AttributeError: + guess = _guess_task_from_config(config) + if guess is not None: + return guess assert config.architectures is not None and len(config.architectures) == 1, ( f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, " f"architectures={config.architectures} in config={config}" diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py index ac4565f7..ce3794dc 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_data.py +++ b/onnx_diagnostic/torch_models/hghub/hub_data.py @@ -78,6 +78,7 @@ MobileBertModel,feature-extraction MobileNetV1Model,image-feature-extraction MobileNetV2Model,image-feature-extraction + mobilenetv3_small_100,image-classification MobileViTForImageClassification,image-classification ModernBertForMaskedLM,fill-mask Phi4MMForCausalLM,MoE @@ -202,7 +203,7 @@ def load_models_testing() -> List[str]: @functools.cache def load_architecture_task() -> Dict[str, str]: """ - Returns a dictionary mapping architecture to task. + Returns a dictionary mapping architectures to tasks. import pprint from onnx_diagnostic.torch_models.hghub.hub_data import load_architecture_task diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py index f4ab2650..b3b52c35 100644 --- a/onnx_diagnostic/torch_models/hghub/model_inputs.py +++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py @@ -4,7 +4,7 @@ import transformers from ...helpers.config_helper import update_config from ...tasks import reduce_model_config, random_input_kwargs -from .hub_api import task_from_arch, get_pretrained_config +from .hub_api import task_from_arch, task_from_id, get_pretrained_config def get_untrained_model_with_inputs( @@ -64,17 +64,21 @@ def get_untrained_model_with_inputs( config = get_pretrained_config( model_id, use_preinstalled=use_preinstalled, **(model_kwargs or {}) ) + if hasattr(config, "architecture") and config.architecture: + archs = [config.architecture] archs = config.architectures # type: ignore - assert archs is not None and len(archs) == 1, ( + task = None + if archs is None: + task = task_from_id(model_id) + assert task is not None or (archs is not None and len(archs) == 1), ( f"Unable to determine the architecture for model {model_id!r}, " f"architectures={archs!r}, conf={config}" ) - arch = archs[0] - if verbose: - print(f"[get_untrained_model_with_inputs] architecture={arch!r}") if verbose: + print(f"[get_untrained_model_with_inputs] architectures={archs!r}") print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}") - task = task_from_arch(arch) + if task is None: + task = task_from_arch(archs[0]) if verbose: print(f"[get_untrained_model_with_inputs] task={task!r}") @@ -106,7 +110,15 @@ def get_untrained_model_with_inputs( if inputs_kwargs: kwargs.update(inputs_kwargs) - model = getattr(transformers, arch)(config) + if archs is not None: + model = getattr(transformers, archs[0])(config) + else: + assert same_as_pretrained, ( + f"Model {model_id!r} cannot be built, the model cannot be built. " + f"It must be downloaded. Use same_as_pretrained=True." + ) + model = None + # This line is important. Some models may produce different # outputs even with the same inputs in training mode. model.eval() From a8d1b9404462e1a77aa3da994e9c6b2fef0eea06 Mon Sep 17 00:00:00 2001 From: xadupre Date: Fri, 2 May 2025 12:26:15 +0200 Subject: [PATCH 5/6] update CI --- .github/workflows/ci.yml | 3 +++ .github/workflows/documentation.yml | 3 +++ .gitignore | 1 + requirements-dev.txt | 1 + 4 files changed, 8 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 13f48107..fa076bb5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,6 +61,9 @@ jobs: - name: Install requirements run: python -m pip install -r requirements.txt + - name: Uninstall onnx + run: python -m pip uninstall -y onnx + - name: Install requirements dev run: python -m pip install -r requirements-dev.txt diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index b7abaa86..c4570aaf 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -38,6 +38,9 @@ jobs: - name: Install requirements run: python -m pip install -r requirements.txt + - name: Uninstall onnx + run: python -m pip uninstall -y onnx + - name: Install requirements dev run: python -m pip install -r requirements-dev.txt diff --git a/.gitignore b/.gitignore index 325e639e..ce041d2e 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ dump_models/* dump_bash_bench/* dump_llama/* dump_test* +dump_validate* dump_sdpa_* temp_dump_models/* dump_dort_bench/* diff --git a/requirements-dev.txt b/requirements-dev.txt index 9bc0d4b4..61ddc0f0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,6 +5,7 @@ furo huggingface_hub matplotlib onnx-array-api>=0.3.1 +onnx-weekly git+https://github.com/microsoft/onnxscript.git openpyxl packaging From 84fd19e9b40c124a6282bc75e826fe47b1eeb7f3 Mon Sep 17 00:00:00 2001 From: xadupre Date: Fri, 2 May 2025 15:01:50 +0200 Subject: [PATCH 6/6] onnx --- .github/workflows/ci.yml | 8 +++++--- .github/workflows/documentation.yml | 8 +++++--- _unittests/ut_reference/test_reference_back.py | 1 + requirements-dev.txt | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa076bb5..7b8ac9df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,12 +61,14 @@ jobs: - name: Install requirements run: python -m pip install -r requirements.txt - - name: Uninstall onnx - run: python -m pip uninstall -y onnx - - name: Install requirements dev run: python -m pip install -r requirements-dev.txt + - name: Uninstall onnx and install onnx-weekly + run: | + python -m pip uninstall -y onnx + python -m pip install onnx-weekly + - name: Cache pip uses: actions/cache@v4 with: diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index c4570aaf..9c69e20a 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -38,12 +38,14 @@ jobs: - name: Install requirements run: python -m pip install -r requirements.txt - - name: Uninstall onnx - run: python -m pip uninstall -y onnx - - name: Install requirements dev run: python -m pip install -r requirements-dev.txt + - name: Uninstall onnx and install onnx-weekly + run: | + python -m pip uninstall -y onnx + python -m pip install onnx-weekly + - name: Cache pip uses: actions/cache@v4 with: diff --git a/_unittests/ut_reference/test_reference_back.py b/_unittests/ut_reference/test_reference_back.py index b0521988..db4cd59d 100644 --- a/_unittests/ut_reference/test_reference_back.py +++ b/_unittests/ut_reference/test_reference_back.py @@ -15,6 +15,7 @@ class ExtendedReferenceEvaluatorBackendRep(onnx.backend.base.BackendRep): def __init__(self, session): + super().__init__() self._session = session def run(self, inputs, **kwargs): diff --git a/requirements-dev.txt b/requirements-dev.txt index 61ddc0f0..b7ff92fd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,7 +5,7 @@ furo huggingface_hub matplotlib onnx-array-api>=0.3.1 -onnx-weekly +onnx git+https://github.com/microsoft/onnxscript.git openpyxl packaging