Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ Change Logs
0.7.0
+++++

* :pr:`143`: compares intermediate results
* :pr:`144`: support for second inputs with different dimension,
rename test_helper into validate,
support ``interpolate_pos_encoding`` for ``VitModel``,
update model builder helpers for this PR
`Use ONNX IR for model builder
<https://github.com/microsoft/onnxruntime-genai/pull/1416>`_
* :pr:`143`: compares intermediate results,

0.6.3
+++++
Expand Down
2 changes: 1 addition & 1 deletion _doc/api/torch_models/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ onnx_diagnostic.torch_models

hghub/index
llms
test_helper
validate

.. automodule:: onnx_diagnostic.torch_models
:members:
Expand Down
7 changes: 0 additions & 7 deletions _doc/api/torch_models/test_helper.rst

This file was deleted.

7 changes: 7 additions & 0 deletions _doc/api/torch_models/validate.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

onnx_diagnostic.torch_models.validate
=====================================

.. automodule:: onnx_diagnostic.torch_models.validate
:members:
:no-undoc-members:
4 changes: 2 additions & 2 deletions _doc/cmds/validate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
===================================================

The command line is a wrapper around function
:func:`onnx_diagnostic.torch_models.test_helper.validate_model`.
:func:`onnx_diagnostic.torch_models.validate.validate_model`.

Description
+++++++++++
Expand Down Expand Up @@ -110,7 +110,7 @@ Run onnxruntime fusions

This option runs `transformers optimizations <https://onnxruntime.ai/docs/performance/transformers-optimization.html>`_
implemented in :epkg:`onnxruntime`. The list of supported ``model_type`` can be found in the documentation
of function :func:`onnx_diagnostic.torch_models.test_helper.run_ort_fusion`.
of function :func:`onnx_diagnostic.torch_models.validate.run_ort_fusion`.

.. code-block::

Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_helpers/test_doc_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_custom_doc_kernels_layer_normalization(self):
)
expected = torch_sess.run(None, feeds)
got = torch_sess_custom.run(None, feeds)
self.assertEqualAny(expected, got)
self.assertEqualAny(expected, got, atol=1e-3)

def test_custom_doc_kernels_matmul(self):
model = oh.make_model(
Expand Down
28 changes: 6 additions & 22 deletions _unittests/ut_helpers/test_model_builder_helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import unittest
from onnx_diagnostic.ext_test_case import (
ExtTestCase,
Expand Down Expand Up @@ -48,32 +47,17 @@ def test_model_builder_id(self):
cache_dir=folder,
verbose=1,
)
self.assertGreater(len(onnx_model.nodes), 5)
self.assertGreater(onnx_model.model.graph.num_nodes(), 5)
model_name = save_model_builder(onnx_model, folder, verbose=1)
self.assertExists(model_name)

proto = save_model_builder(onnx_model, verbose=1)
import onnxruntime

onnxruntime.InferenceSession(
proto.SerializeToString(), providers=["CPUExecutionProvider"]
)

# We need to start again.
onnx_model = create_model_builder(
data["configuration"],
data["model"],
precision="fp32",
execution_provider="cpu",
cache_dir=folder,
verbose=1,
)
save_model_builder(onnx_model, folder, verbose=1)
model_name = os.path.join(folder, "model.onnx")
self.assertExists(model_name)

feeds = make_feeds(proto, data["inputs"], use_numpy=True)
sess = onnxruntime.InferenceSession(model_name, providers=["CPUExecutionProvider"])
del data["inputs"]["position_ids"]
feeds = make_feeds([i.name for i in sess.get_inputs()], data["inputs"], use_numpy=True)
expected = data["model"](**data["inputs"])

sess = onnxruntime.InferenceSession(model_name, providers=["CPUExecutionProvider"])
try:
got = sess.run(None, feeds)
except onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument as e:
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_torch_models/test_validate_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
requires_experimental,
requires_transformers,
)
from onnx_diagnostic.torch_models.test_helper import validate_model
from onnx_diagnostic.torch_models.validate import validate_model


class TestValidateModel(ExtTestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
requires_onnxscript,
requires_transformers,
)
from onnx_diagnostic.torch_models.test_helper import (
from onnx_diagnostic.torch_models.validate import (
get_inputs_for_task,
validate_model,
filter_inputs,
Expand All @@ -21,7 +21,7 @@
from onnx_diagnostic.tasks import supported_tasks


class TestTestHelper(ExtTestCase):
class TestValidateWholeModels(ExtTestCase):
def test_get_inputs_for_task(self):
fcts = supported_tasks()
for task in self.subloop(sorted(fcts)):
Expand Down Expand Up @@ -221,14 +221,39 @@ def test_validate_model_modelbuilder(self):
do_run=True,
verbose=10,
exporter="modelbuilder",
dump_folder="dump_test_validate_model_onnx_dynamo",
dump_folder="dump_test_validate_model_modelbuilder",
)
self.assertIsInstance(summary, dict)
self.assertIsInstance(data, dict)
self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-4)
onnx_filename = data["onnx_filename"]
self.assertExists(onnx_filename)

@requires_torch("2.7")
@hide_stdout()
@ignore_warnings(FutureWarning)
@requires_transformers("4.51")
def test_validate_model_vit_model(self):
mid = "ydshieh/tiny-random-ViTForImageClassification"
summary, data = validate_model(
mid,
do_run=True,
verbose=10,
exporter="onnx-dynamo",
dump_folder="dump_test_validate_model_onnx_dynamo",
inputs2=True,
)
self.assertIsInstance(summary, dict)
self.assertIsInstance(data, dict)
self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-3)
self.assertLess(summary["disc_onnx_ort_run2_abs"], 1e-3)
self.assertEqual("dict(pixel_values:A1s2x3x30x30)", summary["run_feeds_inputs"])
self.assertEqual("dict(pixel_values:A1s3x3x31x31)", summary["run_feeds_inputs2"])
self.assertEqual("#1[A1s2x2]", summary["run_output_inputs"])
self.assertEqual("#1[A1s3x2]", summary["run_output_inputs2"])
onnx_filename = data["onnx_filename"]
self.assertExists(onnx_filename)


if __name__ == "__main__":
unittest.main(verbosity=2)
10 changes: 9 additions & 1 deletion onnx_diagnostic/_command_lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ def get_parser_validate() -> ArgumentParser:
action=BooleanOptionalAction,
help="validate the trained model (requires downloading)",
)
parser.add_argument(
"--inputs2",
default=True,
action=BooleanOptionalAction,
help="if run is on, the command lines validates the model on a "
"second set of inputs to check the exported model supports dynamism",
)
parser.add_argument(
"--runtime",
choices=["onnxruntime", "torch", "ref"],
Expand Down Expand Up @@ -440,7 +447,7 @@ def get_parser_validate() -> ArgumentParser:

def _cmd_validate(argv: List[Any]):
from .helpers import string_type
from .torch_models.test_helper import get_inputs_for_task, validate_model
from .torch_models.validate import get_inputs_for_task, validate_model
from .tasks import supported_tasks

parser = get_parser_validate()
Expand Down Expand Up @@ -492,6 +499,7 @@ def _cmd_validate(argv: List[Any]):
runtime=args.runtime,
repeat=args.repeat,
warmup=args.warmup,
inputs2=args.inputs2,
)
print("")
print("-- summary --")
Expand Down
127 changes: 54 additions & 73 deletions onnx_diagnostic/helpers/model_builder_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import requests
import sys
from pathlib import Path
from typing import Any, Optional
from typing import Any, Optional, Union
from urllib.parse import urlparse
from onnx import helper, save_model, external_data_helper, ModelProto
from onnx import ModelProto, TensorProto

CACHE_SUBDIR = "onnx-diagnostic"

Expand Down Expand Up @@ -114,87 +114,58 @@ def _make_model(self, model, verbose: int = 0):
self.make_lm_head(module)


def save_model_builder(self, out_dir: Optional[str] = "", verbose: int = 0) -> ModelProto:
def save_model_builder(
self, out_dir: Optional[str] = "", verbose: int = 0
) -> Union[str, ModelProto]:
"""
Saves a model created by function :func:`create_model_builder`.
If out_dir is empty or not specified, the function still returns the
generated model.
"""
if verbose:
print(f"[save_model_builder] Saving ONNX model in {out_dir}")

# Create ONNX model
model = helper.make_model(
opset_imports=[
self.clear_field(
helper.make_operatorsetid("", 21 if self.quant_attrs["use_qdq"] else 14),
"domain",
),
helper.make_operatorsetid("com.microsoft", 1),
],
ir_version=7,
producer_name="onnxruntime-genai",
producer_version="0.0.0",
graph=self.make_graph(
name="main_graph",
inputs=self.inputs,
outputs=self.outputs,
initializer=self.initializers,
value_info=self.value_infos,
nodes=self.nodes,
),
)

# Load external data into ONNX model
external_data_helper.load_external_data_for_model(model, self.cache_dir)

# Delete external data files on disk before re-saving
for path in os.listdir(self.cache_dir):
if path.endswith(".bin"):
os.remove(os.path.join(self.cache_dir, path))
import onnx_ir

# Delete temporary cache dir if empty
# if len(os.listdir(self.cache_dir)) == 0:
# os.rmdir(self.cache_dir)
if verbose:
print(f"[save_model_builder] Saving ONNX model in {out_dir!r}")

# Quantize ONNX model to desired precision
# Skip quantizing `MatMul` in `DequantizeLinear --> Transpose --> MatMul` path
already_quantized_in_qdq_format = (
self.quant_type is not None and self.quant_attrs["use_qdq"]
) # Skip quantizing `MatMul` in `DequantizeLinear --> Transpose --> MatMul` path
if self.onnx_dtype == "int4" and not already_quantized_in_qdq_format:
model = self.to_int4(model)
)
model = (
self.to_int4()
if self.onnx_dtype in {onnx_ir.DataType.INT4, onnx_ir.DataType.UINT4}
and not already_quantized_in_qdq_format
else self.model
)
model.graph.sort()
if not out_dir:
return onnx_ir.to_proto(model)

# Save ONNX model with only one external data file and delete any existing duplicate copies
if out_dir:
out_path = os.path.join(out_dir, self.filename)
data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
if os.path.exists(out_path):
if verbose:
print(f"[save_model_builder] Overwriting {out_path!r}")
os.remove(out_path)
if os.path.exists(data_path):
if verbose:
print(f"[save_model_builder] Overwriting {data_path!r}")
os.remove(data_path)
out_path = os.path.join(out_dir, self.filename)
data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")

if out_dir:
location = os.path.basename(data_path)
if os.path.exists(location):
os.remove(location)
# Save ONNX model with only one external data file and delete any existing duplicate copies
out_path = os.path.join(out_dir, self.filename)
data_path = os.path.join(out_dir, os.path.basename(out_path) + ".data")
if os.path.exists(out_path):
if verbose:
print(f"[save_model_builder] out_path={out_path!r}")
print(f"[save_model_builder] location={location!r}")
save_model(
model,
out_path,
save_as_external_data=True,
all_tensors_to_one_file=True,
location=location,
size_threshold=1024,
convert_attribute=False,
)
return None
return model
print(f"[save_model_builder] Overwriting {out_path!r}")
os.remove(out_path)
if os.path.exists(data_path):
if verbose:
print(f"[save_model_builder] Overwriting {data_path!r}")
os.remove(data_path)

onnx_ir.save(
model,
out_path,
external_data=os.path.basename(data_path),
size_threshold_bytes=2**10,
)
if verbose:
print(f"[save_model_builder] saved in {out_dir!r}")

return out_path


def create_model_builder(
Expand Down Expand Up @@ -335,13 +306,23 @@ def _post(onnx_model):
for c in remove:
delattr(config, c)

onnx_model = cls(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
convert = {
"fp32": TensorProto.FLOAT,
"fp16": TensorProto.FLOAT16,
"bfp16": TensorProto.BFLOAT16,
}
assert (
precision in convert
), f"Unexpected value for precision={precision!r}, should be in {convert}"
onnx_model = cls(
config, io_dtype, convert[precision], execution_provider, cache_dir, extra_options
)

if post:
post(onnx_model)
_make_model(onnx_model, model, verbose=verbose)

assert onnx_model.nodes, (
assert onnx_model.model, (
f"No node in the model, io_dtype={io_dtype!r}, "
f"precision={precision!r}, execution_provider={execution_provider!r}, "
f"extra_options={extra_options!r}, cache_dir={cache_dir!r}, "
Expand Down
5 changes: 4 additions & 1 deletion onnx_diagnostic/tasks/image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_inputs(
input_width, int
), f"Unexpected type for input_width {type(input_width)}{config}"
assert isinstance(
input_width, int
input_height, int
), f"Unexpected type for input_height {type(input_height)}{config}"

shapes = {
Expand All @@ -67,6 +67,9 @@ def get_inputs(
-1, 1
),
)
if model.__class__.__name__ == "ViTForImageClassification":
inputs["interpolate_pos_encoding"] = True
shapes["interpolate_pos_encoding"] = None # type: ignore[assignment]
res = dict(inputs=inputs, dynamic_shapes=shapes)
if add_second_input:
res["inputs2"] = get_inputs(
Expand Down
Loading
Loading