Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Change Logs
0.4.4
+++++

* :pr:`82`: exposes ``register_flattening_functions``, add option ``--subfolder``
* :pr:`81`: fixes missing ``intermediate_size`` in configuration
* :pr:`79`: implements task ``object-detection``
* :pr:`78`: uses *onnx-weekly* instead of *onnx* to avoid conflicts with *onnxscript*

Expand Down
1 change: 1 addition & 0 deletions _doc/examples/plot_export_with_dynamic_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def forward(self, cache, z):
)
print(ep)

# %%
# Do we need to guess?
# ++++++++++++++++++++
#
Expand Down
5 changes: 5 additions & 0 deletions onnx_diagnostic/_command_lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ def get_parser_validate() -> ArgumentParser:
help="drops the following inputs names, it should be a list "
"with comma separated values",
)
parser.add_argument(
"--subfolder",
help="subfolder where to find the model and the configuration",
)
parser.add_argument(
"--ortfusiontype",
required=False,
Expand Down Expand Up @@ -413,6 +417,7 @@ def _cmd_validate(argv: List[Any]):
ortfusiontype=args.ortfusiontype,
input_options=args.iop,
model_options=args.mop,
subfolder=args.subfolder,
)
print("")
print("-- summary --")
Expand Down
12 changes: 12 additions & 0 deletions onnx_diagnostic/torch_export_patches/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,15 @@

# bypass_export_some_errors is the first name given to the patches.
bypass_export_some_errors = torch_export_patches # type: ignore


def register_flattening_functions(verbose: int = 0):
"""
Registers functions to serialize deserialize cache or other classes
implemented in :epkg:`transformers` and used as inputs.
This is needed whenever a model must be exported through
:func:`torch.export.export`.
"""
from .onnx_export_serialization import _register_cache_serialization

return _register_cache_serialization(verbose=verbose)
30 changes: 27 additions & 3 deletions onnx_diagnostic/torch_models/hghub/hub_api.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import copy
import functools
import json
import os
from typing import Any, Dict, List, Optional, Union
import transformers
from huggingface_hub import HfApi, model_info
from huggingface_hub import HfApi, model_info, hf_hub_download
from ...helpers.config_helper import update_config
from . import hub_data_cached_configs
from .hub_data import __date__, __data_tasks__, load_architecture_task, __data_arch_values__
Expand Down Expand Up @@ -59,7 +60,11 @@ def get_cached_configuration(name: str, **kwargs) -> Optional[transformers.Pretr


def get_pretrained_config(
model_id: str, trust_remote_code: bool = True, use_preinstalled: bool = True, **kwargs
model_id: str,
trust_remote_code: bool = True,
use_preinstalled: bool = True,
subfolder: Optional[str] = None,
**kwargs,
) -> Any:
"""
Returns the config for a model_id.
Expand All @@ -71,13 +76,32 @@ def get_pretrained_config(
accessing the network, if available, it is returned by
:func:`get_cached_configuration`, the cached list is mostly for
unit tests
:param subfolder: subfolder for the given model id
:param kwargs: additional kwargs
:return: a configuration
"""
if use_preinstalled:
conf = get_cached_configuration(model_id, **kwargs)
conf = get_cached_configuration(model_id, subfolder=subfolder, **kwargs)
if conf is not None:
return conf
if subfolder:
try:
return transformers.AutoConfig.from_pretrained(
model_id, trust_remote_code=trust_remote_code, subfolder=subfolder, **kwargs
)
except ValueError:
# Then we try to download it.
config = hf_hub_download(
model_id, filename="config.json", subfolder=subfolder, **kwargs
)
try:
return transformers.AutoConfig.from_pretrained(
config, trust_remote_code=trust_remote_code, **kwargs
)
except ValueError:
# Diffusers uses a dictionayr.
with open(config, "r") as f:
return json.load(f)
return transformers.AutoConfig.from_pretrained(
model_id, trust_remote_code=trust_remote_code, **kwargs
)
Expand Down
2 changes: 2 additions & 0 deletions onnx_diagnostic/torch_models/hghub/hub_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
T5ForConditionalGeneration,text2text-generation
TableTransformerModel,image-feature-extraction
TableTransformerForObjectDetection,object-detection
UNet2DConditionModel,text-to-image
UniSpeechForSequenceClassification,audio-classification
ViTForImageClassification,image-classification
ViTMAEModel,image-feature-extraction
Expand Down Expand Up @@ -163,6 +164,7 @@
"sentence-similarity",
"text-classification",
"text-generation",
"text-to-image",
"text-to-audio",
"text2text-generation",
"zero-shot-image-classification",
Expand Down
17 changes: 15 additions & 2 deletions onnx_diagnostic/torch_models/hghub/model_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def get_untrained_model_with_inputs(
same_as_pretrained: bool = False,
use_preinstalled: bool = True,
add_second_input: bool = False,
subfolder: Optional[str] = None,
) -> Dict[str, Any]:
"""
Gets a non initialized model similar to the original model
Expand All @@ -37,6 +38,7 @@ def get_untrained_model_with_inputs(
:param use_preinstalled: use preinstalled configurations
:param add_second_input: provides a second inputs to check a model
supports different shapes
:param subfolder: subfolder to use for this model id
:return: dictionary with a model, inputs, dynamic shapes, and the configuration

Example:
Expand All @@ -62,11 +64,18 @@ def get_untrained_model_with_inputs(
print(f"[get_untrained_model_with_inputs] use preinstalled {model_id!r}")
if config is None:
config = get_pretrained_config(
model_id, use_preinstalled=use_preinstalled, **(model_kwargs or {})
model_id,
use_preinstalled=use_preinstalled,
subfolder=subfolder,
**(model_kwargs or {}),
)
if hasattr(config, "architecture") and config.architecture:
archs = [config.architecture]
archs = config.architectures # type: ignore
if type(config) is dict:
assert "_class_name" in config, f"Unable to get the architecture from config={config}"
archs = [config["_class_name"]]
else:
archs = config.architectures # type: ignore
task = None
if archs is None:
task = task_from_id(model_id)
Expand All @@ -84,6 +93,10 @@ def get_untrained_model_with_inputs(

# model kwagrs
if dynamic_rope is not None:
assert (
type(config) is not dict
), f"Unable to set dynamic_rope if the configuration is a dictionary\n{config}"
assert hasattr(config, "rope_scaling"), f"Missing 'rope_scaling' in\n{config}"
config.rope_scaling = (
{"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None
)
Expand Down
28 changes: 19 additions & 9 deletions onnx_diagnostic/torch_models/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,12 @@ def _make_folder_name(
optimization: Optional[str] = None,
dtype: Optional[Union[str, torch.dtype]] = None,
device: Optional[Union[str, torch.device]] = None,
subfolder: Optional[str] = None,
) -> str:
"Creates a filename unique based on the given options."
els = [model_id.replace("/", "_")]
if subfolder:
els.append(subfolder.replace("/", "_"))
if exporter:
els.append(exporter)
if optimization:
Expand Down Expand Up @@ -224,6 +227,7 @@ def validate_model(
ortfusiontype: Optional[str] = None,
input_options: Optional[Dict[str, Any]] = None,
model_options: Optional[Dict[str, Any]] = None,
subfolder: Optional[str] = None,
) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
"""
Validates a model.
Expand Down Expand Up @@ -256,11 +260,11 @@ def validate_model(
used to export
:param model_options: additional options when creating the model such as
``num_hidden_layers`` or ``attn_implementation``
:param subfolder: version or subfolders to uses when retrieving a model id
:return: two dictionaries, one with some metrics,
another one with whatever the function produces
"""
summary = version_summary()

summary.update(
dict(
version_model_id=model_id,
Expand All @@ -282,7 +286,7 @@ def validate_model(
folder_name = None
if dump_folder:
folder_name = _make_folder_name(
model_id, exporter, optimization, dtype=dtype, device=device
model_id, exporter, optimization, dtype=dtype, device=device, subfolder=subfolder
)
dump_folder = os.path.join(dump_folder, folder_name)
if not os.path.exists(dump_folder):
Expand All @@ -293,11 +297,15 @@ def validate_model(
print(f"[validate_model] dump into {folder_name!r}")

if verbose:
print(f"[validate_model] validate model id {model_id!r}")
if subfolder:
print(f"[validate_model] validate model id {model_id!r}, subfolder={subfolder!r}")
else:
print(f"[validate_model] validate model id {model_id!r}")
if model_options:
print(f"[validate_model] model_options={model_options!r}")
print(f"[validate_model] get dummy inputs with input_options={input_options}...")
summary["model_id"] = model_id
summary["model_subfolder"] = subfolder or ""

iop = input_options or {}
mop = model_options or {}
Expand All @@ -307,14 +315,15 @@ def validate_model(
summary,
None,
(
lambda mid=model_id, v=verbose, task=task, tr=trained, iop=iop: (
lambda mid=model_id, v=verbose, task=task, tr=trained, iop=iop, sub=subfolder: (
get_untrained_model_with_inputs(
mid,
verbose=v,
task=task,
same_as_pretrained=tr,
inputs_kwargs=iop,
model_kwargs=mop,
subfolder=sub,
)
)
),
Expand Down Expand Up @@ -1060,15 +1069,16 @@ def call_torch_export_custom(
assert (
optimization in available
), f"unexpected value for optimization={optimization}, available={available}"
assert exporter in {
available = {
"custom",
"custom-strict",
"custom-strict-dec",
"custom-strict-default",
"custom-strict-all",
"custom-nostrict",
"custom-nostrict-dec",
"custom-nostrict-default",
"custom-nostrict-all",
}, f"Unexpected value for exporter={exporter!r}"
}
assert exporter in available, f"Unexpected value for exporter={exporter!r} in {available}"
assert "model" in data, f"model is missing from data: {sorted(data)}"
assert "inputs_export" in data, f"inputs_export is missing from data: {sorted(data)}"
summary: Dict[str, Union[str, int, float]] = {}
Expand Down Expand Up @@ -1100,7 +1110,7 @@ def call_torch_export_custom(
export_options = ExportOptions(
strict=strict,
decomposition_table=(
"dec" if "-dec" in exporter else ("all" if "-all" in exporter else None)
"default" if "-default" in exporter else ("all" if "-all" in exporter else None)
),
)
options = OptimizationOptions(patterns=optimization) if optimization else None
Expand Down
Loading