Skip to content

Commit 0c37bc3

Browse files
committed
doc
1 parent cc0c5f2 commit 0c37bc3

File tree

4 files changed

+91
-40
lines changed

4 files changed

+91
-40
lines changed

_doc/cmds/validate.rst

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ exports the model, measures the discrepancies...
1818
Get the list of supported tasks
1919
+++++++++++++++++++++++++++++++
2020

21+
The task are the same defined by :epkg:`HuggingFace`.
22+
The tool only supports a subset of them.
23+
2124
.. code-block::
2225
2326
python -m onnx_diagnostic validate
@@ -32,6 +35,10 @@ Get the list of supported tasks
3235
Get the default inputs for a specific task
3336
++++++++++++++++++++++++++++++++++++++++++
3437

38+
This returns the dummy inputs for a specific task.
39+
There may be too many inputs. Only those the forward method
40+
defines are kept.
41+
3542
.. code-block::
3643
3744
python -m onnx_diagnostic validate -t text-generation
@@ -42,8 +49,12 @@ Get the default inputs for a specific task
4249

4350
main("validate -t text-generation".split())
4451

45-
Validate a model
46-
++++++++++++++++
52+
Validate dummy inputs for a model
53+
+++++++++++++++++++++++++++++++++
54+
55+
The dummy inputs may not work for this model and this task.
56+
The following command line checks that. It is no use to export
57+
if this fails.
4758

4859
.. code-block::
4960
@@ -54,3 +65,22 @@ Validate a model
5465
from onnx_diagnostic._command_lines_parser import main
5566

5667
main("validate -m arnir0/Tiny-LLM --run -v 1".split())
68+
69+
Validate and export a model
70+
+++++++++++++++++++++++++++
71+
72+
Exports a model given the task. Checks for discrepancies as well.
73+
The latency given are just for one run. It tells how long the benchmark
74+
runs but it is far from the latency measure we can get by running multiple times
75+
the same model.
76+
77+
78+
.. code-block::
79+
80+
python -m onnx_diagnostic validate -m arnir0/Tiny-LLM --run -v 1 --export exporter-nostrict -o dump_models --patch
81+
82+
.. runpython::
83+
84+
from onnx_diagnostic._command_lines_parser import main
85+
86+
main("validate -m arnir0/Tiny-LLM --run -v 1 --export exporter-nostrict -o dump_models --patch".split())

_unittests/ut_torch_models/test_test_helpers.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,12 @@ def test_validate_model_export(self):
3939
summary, data = validate_model(
4040
mid,
4141
do_run=True,
42-
verbose=2,
43-
dtype="float32",
44-
device="cpu",
42+
verbose=10,
4543
exporter="export-nostrict",
44+
dump_folder="dump_test_validate_model_export",
4645
)
4746
self.assertIsInstance(summary, dict)
4847
self.assertIsInstance(data, dict)
49-
validate_model(mid, do_run=True, verbose=2, quiet=False)
5048

5149

5250
if __name__ == "__main__":

onnx_diagnostic/_command_lines_parser.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -239,28 +239,10 @@ def get_parser_validate() -> ArgumentParser:
239239
),
240240
epilog="If the model id is specified, one untrained version of it is instantiated.",
241241
)
242-
parser.add_argument(
243-
"-m",
244-
"--mid",
245-
type=str,
246-
help="model id, usually <author>/<name>",
247-
)
248-
parser.add_argument(
249-
"-t",
250-
"--task",
251-
default=None,
252-
help="force the task to use",
253-
)
254-
parser.add_argument(
255-
"-e",
256-
"--export",
257-
help="export the model with this exporter",
258-
)
259-
parser.add_argument(
260-
"-o",
261-
"--opt",
262-
help="optimization to apply after the export",
263-
)
242+
parser.add_argument("-m", "--mid", type=str, help="model id, usually <author>/<name>")
243+
parser.add_argument("-t", "--task", default=None, help="force the task to use")
244+
parser.add_argument("-e", "--export", help="export the model with this exporter")
245+
parser.add_argument("--opt", help="optimization to apply after the export")
264246
parser.add_argument(
265247
"-r",
266248
"--run",
@@ -288,6 +270,12 @@ def get_parser_validate() -> ArgumentParser:
288270
action=BooleanOptionalAction,
289271
help="validate the trained model (requires downloading)",
290272
)
273+
parser.add_argument(
274+
"-o",
275+
"--dump-folder",
276+
help="if not empty, a folder is created to dumps statistics, "
277+
"exported program, onnx...",
278+
)
291279
parser.add_argument("-v", "--verbose", default=0, type=int, help="verbosity")
292280
parser.add_argument("--dtype", help="changes dtype if necessary")
293281
parser.add_argument("--device", help="changes the device if necessary")
@@ -328,6 +316,7 @@ def _cmd_validate(argv: List[Any]):
328316
patch=args.patch,
329317
optimization=args.opt,
330318
exporter=args.export,
319+
dump_folder=args.dump_folder,
331320
)
332321
print("")
333322
print("-- summary --")

onnx_diagnostic/torch_models/test_helper.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from typing import Any, Dict, Optional, Tuple, Union
23
import time
34
import torch
@@ -11,7 +12,7 @@
1112
def empty(value: Any) -> bool:
1213
"""Tells if the value is empty."""
1314
if isinstance(value, (str, list, dict, tuple, set)):
14-
return value
15+
return bool(value)
1516
if value is None:
1617
return True
1718
return False
@@ -22,8 +23,8 @@ def _ds_clean(v):
2223
str(v)
2324
.replace("<class 'onnx_diagnostic.torch_models.hghub.model_inputs.", "")
2425
.replace("'>", "")
25-
.replace("_DimHint(type=<_DimHintType.DYNAMIC: 3>", "DYNAMIC")
26-
.replace("_DimHint(type=<_DimHintType.AUTO: 3>", "AUTO")
26+
.replace("_DimHint(type=<_DimHintType.DYNAMIC: 3>)", "DYNAMIC")
27+
.replace("_DimHint(type=<_DimHintType.AUTO: 3>)", "AUTO")
2728
)
2829

2930

@@ -52,6 +53,7 @@ def validate_model(
5253
optimization: Optional[str] = None,
5354
quiet: bool = False,
5455
patch: bool = False,
56+
dump_folder: Optional[str] = None,
5557
) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
5658
"""
5759
Validates a model.
@@ -72,11 +74,23 @@ def validate_model(
7274
depend on the the exporter
7375
:param quiet: if quiet, catches exception if any issue
7476
:param patch: applies patches before exporting
77+
:param dump_folder: dumps everything in a subfolder of this one
7578
:return: two dictionaries, one with some metrics,
7679
another one with whatever the function produces
7780
"""
7881
assert not trained, f"trained={trained} not supported yet"
7982
summary: Dict[str, Union[int, float, str]] = {}
83+
if dump_folder:
84+
folder_name = f"{model_id.replace('/','-')}-{exporter}-{optimization or ''}"
85+
dump_folder = os.path.join(dump_folder, folder_name)
86+
if not os.path.exists(dump_folder):
87+
os.makedirs(dump_folder)
88+
summary["dump_folder"] = dump_folder
89+
summary["dump_folder_name"] = folder_name
90+
if verbose:
91+
print(f"[validate_model] dump into {folder_name!r}")
92+
else:
93+
folder_name = None
8094
if verbose:
8195
print(f"[validate_model] validate model id {model_id!r}")
8296
print("[validate_model] get dummy inputs...")
@@ -98,15 +112,15 @@ def validate_model(
98112
dtype = getattr(torch, dtype)
99113
if verbose:
100114
print(f"[validate_model] dtype conversion to {dtype}")
101-
data["model"] = to_any(data["model"], dtype)
102-
data["inputs"] = to_any(data["inputs"], dtype)
115+
data["model"] = to_any(data["model"], dtype) # type: ignore
116+
data["inputs"] = to_any(data["inputs"], dtype) # type: ignore
103117
summary["model_dtype"] = str(dtype)
104118

105119
if not empty(device):
106120
if verbose:
107121
print(f"[validate_model] device conversion to {device}")
108-
data["model"] = to_any(data["model"], device)
109-
data["inputs"] = to_any(data["inputs"], device)
122+
data["model"] = to_any(data["model"], device) # type: ignore
123+
data["inputs"] = to_any(data["inputs"], device) # type: ignore
110124
summary["model_device"] = str(device)
111125

112126
summary["time_create"] = time.perf_counter() - begin
@@ -156,6 +170,7 @@ def validate_model(
156170
f"before: {hash_inputs}\n"
157171
f" after: {string_type(data["inputs"], with_shape=True)}"
158172
)
173+
159174
if exporter:
160175
print(
161176
f"[validate_model] export the model with {exporter!r}, "
@@ -164,10 +179,10 @@ def validate_model(
164179
if patch:
165180
if verbose:
166181
print("[validate_model] applies patches before exporting")
167-
with bypass_export_some_errors(
182+
with bypass_export_some_errors( # type: ignore
168183
patch_transformers=True, verbose=max(0, verbose - 1)
169184
) as modificator:
170-
data["inputs_export"] = modificator(data["inputs"])
185+
data["inputs_export"] = modificator(data["inputs"]) # type: ignore
171186

172187
if do_run:
173188
# We run a second time the model to check the patch did not
@@ -230,6 +245,25 @@ def validate_model(
230245
)
231246
summary.update(summary_export)
232247

248+
if dump_folder:
249+
if "exported_program" in data:
250+
ep = data["exported_program"]
251+
if verbose:
252+
print(f"[validate_model] dumps exported program in {dump_folder!r}...")
253+
with open(os.path.join(dump_folder, f"{folder_name}.ep"), "w") as f:
254+
f.write(str(ep))
255+
with open(os.path.join(dump_folder, f"{folder_name}.graph"), "w") as f:
256+
f.write(str(ep.graph))
257+
if verbose:
258+
print("[validate_model] done (dump ep)")
259+
if verbose:
260+
print(f"[validate_model] dumps statistics in {dump_folder!r}...")
261+
with open(os.path.join(dump_folder, f"{folder_name}.stats"), "w") as f:
262+
for k, v in sorted(summary.items()):
263+
f.write(f":{k}:{v};\n")
264+
if verbose:
265+
print("[validate_model] done (dump)")
266+
233267
if verbose:
234268
print("[validate_model] done (final)")
235269
return summary, data
@@ -281,7 +315,7 @@ def split_args_kwargs(inputs: Any) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
281315
return (), inputs
282316
if isinstance(inputs, tuple) and len(inputs) == 2 and isinstance(inputs[1], dict):
283317
return inputs
284-
assert isinstance(inputs, tuple), f"Unexpectd inputs {string_type(inputs)}"
318+
assert isinstance(inputs, tuple), f"Unexpected inputs {string_type(inputs)}"
285319
return inputs, {}
286320

287321

@@ -309,7 +343,7 @@ def call_torch_export_export(
309343
"""
310344
assert "model" in data, f"model is missing from data: {sorted(data)}"
311345
assert "inputs_export" in data, f"inputs_export is missing from data: {sorted(data)}"
312-
summary = {}
346+
summary: Dict[str, Union[str, int, float]] = {}
313347
strict = "nostrict" not in exporter
314348
args, kwargs = split_args_kwargs(data["inputs_export"])
315349
ds = data.get("dynamic_shapes", None)
@@ -323,7 +357,7 @@ def call_torch_export_export(
323357
print(f"[call_torch_export_export] dynamic_shapes={_ds_clean(ds)}")
324358
print("[call_torch_export_export] export...")
325359
summary["export_exporter"] = exporter
326-
summary["export_optimization"] = optimization
360+
summary["export_optimization"] = optimization or ""
327361
summary["export_strict"] = strict
328362
summary["export_args"] = string_type(args, with_shape=True)
329363
summary["export_kwargs"] = string_type(kwargs, with_shape=True)

0 commit comments

Comments
 (0)