Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions _doc/api/tasks/automatic_speech_recognition.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.automatic_speech_recognition
===================================================
onnx_diagnostic.tasks.automatic_speech_recognition
==================================================

.. automodule:: onnx_diagnostic.tasks.automatic_speech_recognition
:members:
Expand Down
4 changes: 2 additions & 2 deletions _doc/api/tasks/fill_mask.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.fill_mask
================================
onnx_diagnostic.tasks.fill_mask
===============================

.. automodule:: onnx_diagnostic.tasks.fill_mask
:members:
Expand Down
4 changes: 2 additions & 2 deletions _doc/api/tasks/image_classification.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.image_classification
===========================================
onnx_diagnostic.tasks.image_classification
==========================================

.. automodule:: onnx_diagnostic.tasks.image_classification
:members:
Expand Down
1 change: 1 addition & 0 deletions _doc/api/tasks/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ onnx_diagnostic.tasks
fill_mask
image_classification
image_text_to_text
sentence_similarity
text_classification
text_generation
text2text_generation
Expand Down
7 changes: 7 additions & 0 deletions _doc/api/tasks/sentence_similarity.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

onnx_diagnostic.tasks.sentence_similarity
=========================================

.. automodule:: onnx_diagnostic.tasks.sentence_similarity
:members:
:no-undoc-members:
4 changes: 2 additions & 2 deletions _doc/api/tasks/text2text_generation.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.text2text_generation
===========================================
onnx_diagnostic.tasks.text2text_generation
==========================================

.. automodule:: onnx_diagnostic.tasks.text2text_generation
:members:
Expand Down
4 changes: 2 additions & 2 deletions _doc/api/tasks/text_classification.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.text_classification
==========================================
onnx_diagnostic.tasks.text_classification
=========================================

.. automodule:: onnx_diagnostic.tasks.text_classification
:members:
Expand Down
4 changes: 2 additions & 2 deletions _doc/api/tasks/text_generation.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.text_generation
======================================
onnx_diagnostic.tasks.text_generation
=====================================

.. automodule:: onnx_diagnostic.tasks.text_generation
:members:
Expand Down
4 changes: 2 additions & 2 deletions _doc/api/tasks/zero_shot_image_classification.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

onnx_diagnostic.export.zero_shot_image_classification
=====================================================
onnx_diagnostic.tasks.zero_shot_image_classification
====================================================

.. automodule:: onnx_diagnostic.tasks.zero_shot_image_classification
:members:
Expand Down
6 changes: 3 additions & 3 deletions _doc/examples/plot_export_tiny_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import transformers
from onnx_diagnostic import doc
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.helpers.torch_test_helper import steel_forward
from onnx_diagnostic.helpers.torch_test_helper import steal_forward
from onnx_diagnostic.torch_models.llms import get_tiny_llm


Expand Down Expand Up @@ -77,9 +77,9 @@ def _forward_(*args, _f=None, **kwargs):
model.forward = keep_model_forward

# %%
# Another syntax with :func:`onnx_diagnostic.helpers.torch_test_helper.steel_forward`.
# Another syntax with :func:`onnx_diagnostic.helpers.torch_test_helper.steal_forward`.

with steel_forward(model):
with steal_forward(model):
model.generate(inputs, max_length=50, temperature=1, top_k=50, top_p=0.95, do_sample=True)

# %%
Expand Down
6 changes: 3 additions & 3 deletions _unittests/ut_helpers/test_torch_test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
dummy_llm,
to_numpy,
is_torchdynamo_exporting,
steel_forward,
steal_forward,
replace_string_by_dynamic,
to_any,
torch_deepcopy,
Expand Down Expand Up @@ -43,14 +43,14 @@ def test_to_numpy(self):
self.assertEqual(a.dtype, ml_dtypes.bfloat16)

@hide_stdout()
def test_steel_forward(self):
def test_steal_forward(self):
class Model(torch.nn.Module):
def forward(self, x, y):
return x + y

inputs = torch.rand(3, 4), torch.rand(3, 4)
model = Model()
with steel_forward(model):
with steal_forward(model):
model(*inputs)

def test_replace_string_by_dynamic(self):
Expand Down
21 changes: 17 additions & 4 deletions _unittests/ut_tasks/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class TestTasks(ExtTestCase):
@hide_stdout()
def test_text2text_generation(self):
mid = "sshleifer/tiny-marian-en-de"
# mid = "Salesforce/codet5-small"
data = get_untrained_model_with_inputs(mid, verbose=1)
self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
model, inputs = data["model"], data["inputs"]
Expand Down Expand Up @@ -85,7 +84,6 @@ def test_automatic_speech_recognition(self):
@hide_stdout()
def test_imagetext2text_generation(self):
mid = "HuggingFaceM4/tiny-random-idefics"
# mid = "Salesforce/codet5-small"
data = get_untrained_model_with_inputs(mid, verbose=1)
self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
model, inputs = data["model"], data["inputs"]
Expand All @@ -94,7 +92,6 @@ def test_imagetext2text_generation(self):
@hide_stdout()
def test_fill_mask(self):
mid = "google-bert/bert-base-multilingual-cased"
# mid = "Salesforce/codet5-small"
data = get_untrained_model_with_inputs(mid, verbose=1)
self.assertIn((data["size"], data["n_weights"]), [(428383212, 107095803)])
model, inputs = data["model"], data["inputs"]
Expand All @@ -103,12 +100,28 @@ def test_fill_mask(self):
@hide_stdout()
def test_text_classification(self):
mid = "Intel/bert-base-uncased-mrpc"
# mid = "Salesforce/codet5-small"
data = get_untrained_model_with_inputs(mid, verbose=1)
self.assertIn((data["size"], data["n_weights"]), [(154420232, 38605058)])
model, inputs = data["model"], data["inputs"]
model(**inputs)

@hide_stdout()
def test_sentence_similary(self):
mid = "sentence-transformers/all-MiniLM-L6-v1"
data = get_untrained_model_with_inputs(mid, verbose=1)
self.assertIn((data["size"], data["n_weights"]), [(62461440, 15615360)])
model, inputs = data["model"], data["inputs"]
model(**inputs)

@hide_stdout()
def test_falcon_mamba_dev(self):
mid = "tiiuae/falcon-mamba-tiny-dev"
data = get_untrained_model_with_inputs(mid, verbose=1)
model, inputs = data["model"], data["inputs"]
print(self.string_type(inputs, with_shape=True))
model(**inputs)
self.assertIn((data["size"], data["n_weights"]), [(138640384, 34660096)])


if __name__ == "__main__":
unittest.main(verbosity=2)
131 changes: 127 additions & 4 deletions _unittests/ut_tasks/try_tasks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.helpers.torch_test_helper import steel_forward
from onnx_diagnostic.helpers.torch_test_helper import steal_forward


class TestHuggingFaceHubModel(ExtTestCase):
Expand Down Expand Up @@ -92,7 +92,7 @@ def test_text2text_generation(self):

# simply generate a single sequence
print()
with steel_forward(model):
with steal_forward(model):
generated_ids = model.generate(
decoder_input_ids=input_ids, attention_mask=mask, max_length=100
)
Expand Down Expand Up @@ -121,7 +121,7 @@ def test_imagetext2text_generation(self):
["<image>", "<fake_token_around_image>"], add_special_tokens=False
).input_ids
print()
with steel_forward(model):
with steal_forward(model):
generated_ids = model.generate(
**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids
)
Expand Down Expand Up @@ -184,7 +184,7 @@ def test_automatic_speech_recognition(self):

# generate token ids
print()
with steel_forward(model):
with steal_forward(model):
predicted_ids = model.generate(
input_features, forced_decoder_ids=forced_decoder_ids
)
Expand Down Expand Up @@ -236,6 +236,129 @@ def test_text_classification(self):
encoded_input["input_ids"][0]
tokenizer.convert_ids_to_tokens(encoded_input["input_ids"][0])

@never_test()
def test_sentence_similary(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k ce_sim
# https://huggingface.co/sentence-transformers/all-MiniLM-L6-v1

from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[
0
] # First element of model_output contains all token embeddings
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)

# Sentences we want sentence embeddings for
sentences = ["This is an example sentence", "Each sentence is converted"]

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v1")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v1")

# Tokenize sentences
encoded_input = tokenizer(
sentences, padding=True, truncation=True, return_tensors="pt"
)

# Compute token embeddings
with torch.no_grad():
print()
print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
model_output = model(**encoded_input)
print("-- outputs", string_type(model_output, with_shape=True, with_min_max=True))

# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input["attention_mask"])

# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

print("Sentence embeddings:")
print(sentence_embeddings)

@never_test()
def test_falcon_mamba_dev(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k falcon_mamba_dev
# https://huggingface.co/tiiuae/falcon-mamba-tiny-dev

from transformers import AutoTokenizer
import transformers
import torch

model = "tiiuae/falcon-mamba-tiny-dev"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
print()
with steal_forward(pipeline.model):
sequences = pipeline(
"Girafatron is obsessed with giraffes, "
"the most glorious animal on the face of this Earth. "
"Giraftron believes all other animals are irrelevant "
"when compared to the glorious majesty of the giraffe."
"\nDaniel: Hello, Girafatron!\nGirafatron:",
max_length=200,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
print(f"Result: {seq['generated_text']}")

@never_test()
def test_falcon_mamba_7b(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k falcon_mamba_7b
# https://huggingface.co/tiiuae/falcon-mamba-7b

from transformers import AutoTokenizer
import transformers
import torch

model = "tiiuae/falcon-mamba-7b"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
print()
with steal_forward(pipeline.model):
sequences = pipeline(
"Girafatron is obsessed with giraffes, "
"the most glorious animal on the face of this Earth. "
"Giraftron believes all other animals are irrelevant "
"when compared to the glorious majesty of the giraffe."
"\nDaniel: Hello, Girafatron!\nGirafatron:",
max_length=200,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
print(f"Result: {seq['generated_text']}")


if __name__ == "__main__":
unittest.main(verbosity=2)
5 changes: 4 additions & 1 deletion _unittests/ut_xrun_doc/test_documentation_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def run_test(self, fold: str, name: str, verbose=0) -> int:
# dot not installed, this part
# is tested in onnx framework
raise unittest.SkipTest(f"failed: {name!r} due to missing dot.")
if "We couldn't connect to 'https://huggingface.co'" in st:
if (
"We couldn't connect to 'https://huggingface.co'" in st
or "Cannot access content at: https://huggingface.co/" in st
):
raise unittest.SkipTest(f"Connectivity issues due to\n{err}")
raise AssertionError( # noqa: B904
"Example '{}' (cmd: {} - exec_prefix='{}') "
Expand Down
32 changes: 32 additions & 0 deletions onnx_diagnostic/helpers/cache_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,35 @@ def make_encoder_decoder_cache(
return transformers.cache_utils.EncoderDecoderCache(
self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache
)


def make_mamba_cache(
key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
) -> transformers.cache_utils.MambaCache:
"Creates a :class:`transformers.cache_utils.MambaCache`."

class _config:
def __init__(self):
self.intermediate_size = key_value_pairs[0][0].shape[1]
self.conv_kernel = key_value_pairs[0][0].shape[-1]
self.state_size = key_value_pairs[0][1].shape[-1]
self.num_hidden_layers = len(key_value_pairs)
self.dtype = key_value_pairs[0][0].dtype

cache = transformers.cache_utils.MambaCache(
_config(),
max_batch_size=key_value_pairs[0][0].shape[0],
device=key_value_pairs[0][0].device,
)
for i in range(len(key_value_pairs)):
assert cache.conv_states[i].shape == key_value_pairs[i][0].shape, (
f"Shape mismatch, expected {cache.conv_states[i].shape}, "
f"got {key_value_pairs[i][0].shape}"
)
cache.conv_states[i][:, :, :] = key_value_pairs[i][0]
assert cache.ssm_states[i].shape == key_value_pairs[i][1].shape, (
f"Shape mismatch, expected {cache.ssm_states[i].shape}, "
f"got {key_value_pairs[i][1].shape}"
)
cache.ssm_states[i][:, :, :] = key_value_pairs[i][1]
return cache
Loading
Loading