Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,20 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python: ['3.11', '3.12']
python: ['3.10', '3.11', '3.12']
transformers: ['4.48.3', '4.51.3', 'main']
torch: ['2.6', 'main']

torch: ['2.6', '2.7', 'main']
exclude:
- python: '3.10'
transformers: 'main'
- python: '3.10'
torch: '2.7'
- python: '3.11'
transformers: '4.51.3'
- python: '3.11'
torch: '2.7'
- python: '3.12'
torch: '2.6'
steps:
- uses: actions/checkout@v3

Expand Down
7 changes: 7 additions & 0 deletions _doc/api/tasks/feature_extraction.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

onnx_diagnostic.tasks.feature_extraction
========================================

.. automodule:: onnx_diagnostic.tasks.feature_extraction
:members:
:no-undoc-members:
2 changes: 2 additions & 0 deletions _doc/api/tasks/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ Or:

automatic_speech_recognition
fill_mask
feature_extraction
image_classification
image_text_to_text
mixture_of_expert
sentence_similarity
text_classification
text_generation
Expand Down
7 changes: 7 additions & 0 deletions _doc/api/tasks/mixture_of_expert.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

onnx_diagnostic.tasks.mixture_of_expert
=======================================

.. automodule:: onnx_diagnostic.tasks.mixture_of_expert
:members:
:no-undoc-members:
10 changes: 8 additions & 2 deletions _doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"sphinx.ext.githubpages",
"sphinx.ext.ifconfig",
"sphinx.ext.intersphinx",
"sphinx.ext.linkcode",
"sphinx.ext.mathjax",
"sphinx.ext.viewcode",
"sphinx.ext.todo",
Expand Down Expand Up @@ -63,15 +64,20 @@
# ]

# The following is used by sphinx.ext.linkcode to provide links to github
linkcode_resolve = make_linkcode_resolve(
"onnx-diagnostic",
_linkcode_resolve = make_linkcode_resolve(
"onnx_diagnostic",
(
"https://github.com/sdpython/onnx-diagnostic/"
"blob/{revision}/{package}/"
"{path}#L{lineno}"
),
)


def linkcode_resolve(domain, info):
return _linkcode_resolve(domain, info)


latex_elements = {
"papersize": "a4",
"pointsize": "10pt",
Expand Down
4 changes: 3 additions & 1 deletion _doc/recipes/plot_dynamic_shapes_max.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
in the exported program is something very aggreessive. Here is a case where
it takes a wrong decision and how to get around it.

**This bug was fixed after 4/24/2025**.

Wrong Model
+++++++++++
"""
Expand Down Expand Up @@ -183,4 +185,4 @@ def forward(self, x, y, fact):
# is hidden in a custom operator.


doc.plot_legend("dynamic shapes\nworkaround\nmax(d1, d2)", "dynamic shapes", "yellow")
doc.plot_legend("max(d1, d2)\nwith d1, d2 dimensions", "dynamic shapes", "green")
12 changes: 12 additions & 0 deletions _unittests/ut_tasks/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,18 @@ def test_fill_mask(self):
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)

@hide_stdout()
def test_feature_extraction(self):
mid = "facebook/bart-base"
data = get_untrained_model_with_inputs(mid, verbose=1)
self.assertIn((data["size"], data["n_weights"]), [(557681664, 139420416)])
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
model(**inputs)
with bypass_export_some_errors(patch_transformers=True, verbose=10):
torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)

@hide_stdout()
def test_text_classification(self):
mid = "Intel/bert-base-uncased-mrpc"
Expand Down
121 changes: 119 additions & 2 deletions _unittests/ut_tasks/try_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def test_text2text_generation(self):
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

@never_test()
def test_text_generation_phi4(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4
def test_text_generation_phi4_mini(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_mini

import torch
from transformers import RobertaTokenizer, T5ForConditionalGeneration
Expand All @@ -124,6 +124,107 @@ def test_text_generation_phi4(self):
)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

@never_test()
@unittest.skip(
reason="AttributeError: 'Phi4MMModel' object has no attribute "
"'prepare_inputs_for_generation'"
)
def test_text_generation_phi4_moe(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_moe

import requests
import io
from PIL import Image
import soundfile as sf
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
from urllib.request import urlopen

# Define model path
model_path = "microsoft/Phi-4-multimodal-instruct"

# Load model and processor
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="cuda",
torch_dtype="auto",
trust_remote_code=True,
# if you do not use Ampere or later GPUs, change attention to "eager"
# _attn_implementation='flash_attention_2',
_attn_implementation="eager",
).cuda()

# Load generation config
generation_config = GenerationConfig.from_pretrained(model_path)

# Define prompt structure
user_prompt = "<|user|>"
assistant_prompt = "<|assistant|>"
prompt_suffix = "<|end|>"

# Part 1: Image Processing
print("\n--- IMAGE PROCESSING ---")
image_url = "https://www.ilankelman.org/stopsigns/australia.jpg"
prompt = (
f"{user_prompt}<|image_1|>What is shown in this image"
f"?{prompt_suffix}{assistant_prompt}"
)
print(f">>> Prompt\n{prompt}")

# Download and open image
image = Image.open(requests.get(image_url, stream=True).raw)
inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda:0")

# Generate response
print("--------- IMAGE PROCESSING ----------")
print()
with steal_forward(model):
generate_ids = model.generate(
**inputs,
max_new_tokens=1000,
generation_config=generation_config,
)
generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
response = processor.batch_decode(
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(f">>> Response\n{response}")

# Part 2: Audio Processing
print("\n--- AUDIO PROCESSING ---")
audio_url = (
"https://upload.wikimedia.org/wikipedia/commons/b/b0/"
"Barbara_Sahakian_BBC_Radio4_The_Life_Scientific_29_May_2012_b01j5j24.flac"
)
speech_prompt = (
"Transcribe the audio to text, and then translate the audio to French. "
"Use <sep> as a separator between the original transcript and the translation."
)
prompt = f"{user_prompt}<|audio_1|>{speech_prompt}{prompt_suffix}{assistant_prompt}"
print(f">>> Prompt\n{prompt}")

# Download and open audio file
audio, samplerate = sf.read(io.BytesIO(urlopen(audio_url).read()))

# Process with the model
inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(
"cuda:0"
)

print("--------- AUDIO PROCESSING ----------")
print()
with steal_forward(model):
generate_ids = model.generate(
**inputs,
max_new_tokens=1000,
generation_config=generation_config,
)
generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
response = processor.batch_decode(
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
print(f">>> Response\n{response}")

@never_test()
def test_imagetext2text_generation(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k etext2t
Expand Down Expand Up @@ -237,6 +338,22 @@ def test_fill_mask(self):
output = model(**encoded_input)
print("-- outputs", string_type(output, with_shape=True, with_min_max=True))

@never_test()
def test_feature_extraction(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k feature_ex
# https://huggingface.co/google-bert/bert-base-multilingual-cased

from transformers import BartTokenizer, BartModel

tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartModel.from_pretrained("facebook/bart-base")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors="pt")
print()
print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
output = model(**encoded_input)
print("-- outputs", string_type(output, with_shape=True, with_min_max=True))

@never_test()
def test_text_classification(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k text_cl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_base_sliding_window_cache_unflatten_flatten(self):
self.assertEqualAny([cache], cache2)

@ignore_warnings(UserWarning)
@requires_torch("2.7")
@requires_torch("2.8")
def test_sliding_window_cache_export(self):
class Model(torch.nn.Module):
def forward(self, cache):
Expand Down
7 changes: 6 additions & 1 deletion _unittests/ut_torch_models/test_test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ class TestTestHelper(ExtTestCase):
def test_get_inputs_for_task(self):
fcts = supported_tasks()
for task in self.subloop(sorted(fcts)):
data = get_inputs_for_task(task)
try:
data = get_inputs_for_task(task)
except NotImplementedError:
continue
self.assertIsInstance(data, dict)
self.assertIn("inputs", data)
self.assertIn("dynamic_shapes", data)
Expand Down Expand Up @@ -99,9 +102,11 @@ def test_validate_model_custom(self):
patch=True,
stop_if_static=2 if pv.Version(torch.__version__) > pv.Version("2.6.1") else 0,
optimization="default",
quiet=False,
)
self.assertIsInstance(summary, dict)
self.assertIsInstance(data, dict)
self.assertIn("disc_onnx_ort_run_abs", summary)
self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-4)
onnx_filename = data["onnx_filename"]
output_path = f"{onnx_filename}.ortopt.onnx"
Expand Down
2 changes: 1 addition & 1 deletion onnx_diagnostic/ext_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def requires_sklearn(version: str, msg: str = "") -> Callable:
return lambda x: x


def requires_experimental(version: str = "", msg: str = "") -> Callable:
def requires_experimental(version: str = "0.0.0", msg: str = "") -> Callable:
"""Skips a unit test if :epkg:`experimental-experiment` is not recent enough."""
import packaging.version as pv

Expand Down
4 changes: 4 additions & 0 deletions onnx_diagnostic/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import Any, Callable, Dict, List, Tuple
from . import (
automatic_speech_recognition,
feature_extraction,
fill_mask,
image_classification,
image_text_to_text,
mixture_of_expert,
sentence_similarity,
text_classification,
text_generation,
Expand All @@ -13,9 +15,11 @@

__TASKS__ = [
automatic_speech_recognition,
feature_extraction,
fill_mask,
image_classification,
image_text_to_text,
mixture_of_expert,
sentence_similarity,
text_classification,
text_generation,
Expand Down
2 changes: 1 addition & 1 deletion onnx_diagnostic/tasks/automatic_speech_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_inputs(
**kwargs, # unused
):
"""
Generates inputs for task ``text2text-generation``.
Generates inputs for task ``automatic-speech-recognition``.
Example:

::
Expand Down
65 changes: 65 additions & 0 deletions onnx_diagnostic/tasks/feature_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from typing import Any, Callable, Dict, Optional, Tuple
import torch
from ..helpers.config_helper import update_config, check_hasattr

__TASK__ = "feature-extraction"


def reduce_model_config(config: Any) -> Dict[str, Any]:
"""Reduces a model size."""
check_hasattr(config, "num_attention_heads", "num_hidden_layers")
kwargs = dict(
num_hidden_layers=min(config.num_hidden_layers, 2),
num_attention_heads=min(config.num_attention_heads, 4),
)
update_config(config, kwargs)
return kwargs


def get_inputs(
model: torch.nn.Module,
config: Optional[Any],
batch_size: int,
sequence_length: int,
dummy_max_token_id: int,
**kwargs, # unused
):
"""
Generates inputs for task ``feature-extraction``.
Example:

::

input_ids:T7s1x13[101,72654:A16789.23076923077],
token_type_ids:T7s1x13[0,0:A0.0],
attention_mask:T7s1x13[1,1:A1.0])
"""
batch = torch.export.Dim("batch", min=1, max=1024)
seq_length = "sequence_length"
shapes = {
"input_ids": {0: batch, 1: seq_length},
"attention_mask": {0: batch, 1: seq_length},
}
inputs = dict(
input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to(
torch.int64
),
attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
)
return dict(inputs=inputs, dynamic_shapes=shapes)


def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
"""
Inputs kwargs.

If the configuration is None, the function selects typical dimensions.
"""
if config is not None:
check_hasattr(config, "vocab_size")
kwargs = dict(
batch_size=2,
sequence_length=30,
dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
)
return kwargs, get_inputs
Loading
Loading