diff --git a/.github/workflows/check-urls.yml b/.github/workflows/check-urls.yml index a1521060..c4445b17 100644 --- a/.github/workflows/check-urls.yml +++ b/.github/workflows/check-urls.yml @@ -31,7 +31,7 @@ jobs: timeout: 2 retry_count# : 2 exclude_urls: https://github.com/pytorch/pytorch/pull/117009,https://github.com/huggingface/transformers/pull/29285,https://github.com/pytorch/pytorch/blob/a44f8894fa6d973693aab44a3dda079a168b05c1/torch/_decomp/decompositions.py#L1475 - exclude_patterns: https://dumps.wikimedia.org/,https://github.com/pytorch/pytorch/pull/,https://github.com/pytorch/pytorch/blob/a44f8894fa6d973693aab44a3dda079a168b05c1/torch/_decomp/decompositions.py#L1475,https://huggingface.co/ + exclude_patterns: https://dumps.wikimedia.org/,https://github.com/pytorch/pytorch/pull/,https://github.com/pytorch/pytorch/blob/a44f8894fa6d973693aab44a3dda079a168b05c1/torch/_decomp/decompositions.py#L1475,https://huggingface.co/,https://huggingface.co/ # force_pass : true - name: urls-checker-docs @@ -43,5 +43,5 @@ jobs: timeout: 2 retry_count# : 2 exclude_urls: https://hal.archives-,ouvertes.fr/hal-00990252/document,http://badge.fury.io/py/onnx-diagnostic,https://azure.microsoft.com/en-us/products/devops/pipelines,https://github.com/pytorch/pytorch/pull/83137#issuecomment-1211320670,https://github.com/NVIDIA/TransformerEngine.git@6a9edc38bf9b941b7d369af5103fa8fe0b121d61,https://medium.com/@msouza.os/llm-from-scratch-with-pytorch-9f21808c6319,https://github.com/pytorch/pytorch/blob/main/torch/fx/experimental/symbolic_shapes.py#L5965,https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-04.html,https://badge.fury.io/py/onnx-diagnostic.svg,https://github.com/huggingface/transformers/pull/36311 - exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/,https://azure.microsoft.com/en-us/products/devops/pipelines,https://github.com/pytorch/pytorch/pull/83137#issuecomment-1211320670,https://github.com/NVIDIA/TransformerEngine.git@6a9edc38bf9b941b7d369af5103fa8fe0b121d61,https://github.com/pytorch/pytorch/blob/main/torch/,https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-04.html,https://badge.fury.io/py/onnx-diagnostic.svg,https://github.com/huggingface/transformers/pull/36311,https://codecov.io/ + exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/,https://azure.microsoft.com/en-us/products/devops/pipelines,https://github.com/pytorch/pytorch/pull/83137#issuecomment-1211320670,https://github.com/NVIDIA/TransformerEngine.git@6a9edc38bf9b941b7d369af5103fa8fe0b121d61,https://github.com/pytorch/pytorch/blob/main/torch/,https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-04.html,https://badge.fury.io/py/onnx-diagnostic.svg,https://github.com/huggingface/transformers/pull/36311,https://codecov.io/,https://huggingface.co/ # force_pass : true diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index b461325b..b7abaa86 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -62,9 +62,11 @@ jobs: - name: tiny-llm example run: PYTHONPATH=. python _doc/examples/plot_export_tiny_llm.py + continue-on-error: true # connectivity issues - name: tiny-llm bypass run: PYTHONPATH=. python _doc/examples/plot_export_tiny_llm_patched.py + continue-on-error: true # connectivity issues - name: run tests bypassed run: PYTHONPATH=. python _unittests/ut_torch_models/test_tiny_llms_bypassed.py diff --git a/_doc/api/helpers/config_helper.rst b/_doc/api/helpers/config_helper.rst new file mode 100644 index 00000000..6dd9be71 --- /dev/null +++ b/_doc/api/helpers/config_helper.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.helpers.config_helper +===================================== + +.. automodule:: onnx_diagnostic.helpers.config_helper + :members: + :no-undoc-members: diff --git a/_doc/api/helpers/index.rst b/_doc/api/helpers/index.rst index 79703adf..d3224eb6 100644 --- a/_doc/api/helpers/index.rst +++ b/_doc/api/helpers/index.rst @@ -9,6 +9,7 @@ onnx_diagnostic.helpers args_helper bench_run cache_helper + config_helper helper memory_peak onnx_helper diff --git a/_doc/api/index.rst b/_doc/api/index.rst index 2de84cf2..aa5e7b97 100644 --- a/_doc/api/index.rst +++ b/_doc/api/index.rst @@ -10,6 +10,7 @@ API of onnx_diagnostic export/index helpers/index reference/index + tasks/index torch_export_patches/index torch_models/index torch_onnx/index diff --git a/_doc/api/tasks/automatic_speech_recognition.rst b/_doc/api/tasks/automatic_speech_recognition.rst new file mode 100644 index 00000000..5c3f64ea --- /dev/null +++ b/_doc/api/tasks/automatic_speech_recognition.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.automatic_speech_recognition +=================================================== + +.. automodule:: onnx_diagnostic.tasks.automatic_speech_recognition + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/fill_mask.rst b/_doc/api/tasks/fill_mask.rst new file mode 100644 index 00000000..58c6402a --- /dev/null +++ b/_doc/api/tasks/fill_mask.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.fill_mask +================================ + +.. automodule:: onnx_diagnostic.tasks.fill_mask + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/image_classification.rst b/_doc/api/tasks/image_classification.rst new file mode 100644 index 00000000..3643b2f5 --- /dev/null +++ b/_doc/api/tasks/image_classification.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.image_classification +=========================================== + +.. automodule:: onnx_diagnostic.tasks.image_classification + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/image_text_to_text.rst b/_doc/api/tasks/image_text_to_text.rst new file mode 100644 index 00000000..abe80bd8 --- /dev/null +++ b/_doc/api/tasks/image_text_to_text.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.image_text_to_text +========================================= + +.. automodule:: onnx_diagnostic.tasks.image_text_to_text + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst new file mode 100644 index 00000000..ae7e14b3 --- /dev/null +++ b/_doc/api/tasks/index.rst @@ -0,0 +1,18 @@ +onnx_diagnostic.tasks +===================== + +.. toctree:: + :maxdepth: 1 + :caption: modules + + automatic_speech_recognition + fill_mask + image_classification + image_text_to_text + text_generation + text2text_generation + zero_shot_image_classification + +.. automodule:: onnx_diagnostic.tasks + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/text2text_generation.rst b/_doc/api/tasks/text2text_generation.rst new file mode 100644 index 00000000..c148d174 --- /dev/null +++ b/_doc/api/tasks/text2text_generation.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.text2text_generation +=========================================== + +.. automodule:: onnx_diagnostic.tasks.text2text_generation + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/text_generation.rst b/_doc/api/tasks/text_generation.rst new file mode 100644 index 00000000..3f125381 --- /dev/null +++ b/_doc/api/tasks/text_generation.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.text_generation +====================================== + +.. automodule:: onnx_diagnostic.tasks.text_generation + :members: + :no-undoc-members: diff --git a/_doc/api/tasks/zero_shot_image_classification.rst b/_doc/api/tasks/zero_shot_image_classification.rst new file mode 100644 index 00000000..74d9e619 --- /dev/null +++ b/_doc/api/tasks/zero_shot_image_classification.rst @@ -0,0 +1,7 @@ + +onnx_diagnostic.export.zero_shot_image_classification +===================================================== + +.. automodule:: onnx_diagnostic.tasks.zero_shot_image_classification + :members: + :no-undoc-members: diff --git a/_doc/conf.py b/_doc/conf.py index c1c3e749..0545e77e 100644 --- a/_doc/conf.py +++ b/_doc/conf.py @@ -170,7 +170,9 @@ } if int(os.environ.get("UNITTEST_GOING", "0")): - sphinx_gallery_conf["ignore_pattern"] = ".*((tiny_llm)|(dort)|(draft_mode)).*" + sphinx_gallery_conf["ignore_pattern"] = ( + ".*((tiny_llm)|(dort)|(draft_mode)|(hub_codellama.py)).*" + ) elif pv.Version(torch.__version__) < pv.Version("2.8"): sphinx_gallery_conf["ignore_pattern"] = ".*((_oe_)|(dort)|(draft_mode)).*" diff --git a/_unittests/ut_helpers/test_config_helper.py b/_unittests/ut_helpers/test_config_helper.py new file mode 100644 index 00000000..1ad7a9ad --- /dev/null +++ b/_unittests/ut_helpers/test_config_helper.py @@ -0,0 +1,20 @@ +import unittest +import transformers +from onnx_diagnostic.ext_test_case import ( + ExtTestCase, + requires_torch, + requires_transformers, +) +from onnx_diagnostic.helpers.config_helper import config_class_from_architecture + + +class TestConfigHelper(ExtTestCase): + @requires_transformers("4.50") # we limit to some versions of the CI + @requires_torch("2.7") + def test_config_class_from_architecture(self): + config = config_class_from_architecture("LlamaForCausalLM") + self.assertEqual(config, transformers.LlamaConfig) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py new file mode 100644 index 00000000..e52a0697 --- /dev/null +++ b/_unittests/ut_tasks/test_tasks.py @@ -0,0 +1,105 @@ +import unittest +import torch +from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout +from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs +from onnx_diagnostic.torch_export_patches import bypass_export_some_errors + + +class TestTasks(ExtTestCase): + @hide_stdout() + def test_text2text_generation(self): + mid = "sshleifer/tiny-marian-en-de" + # mid = "Salesforce/codet5-small" + data = get_untrained_model_with_inputs(mid, verbose=1) + self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)]) + model, inputs = data["model"], data["inputs"] + raise unittest.SkipTest(f"not working for {mid!r}") + model(**inputs) + + @hide_stdout() + def test_automatic_speech_recognition(self): + mid = "openai/whisper-tiny" + data = get_untrained_model_with_inputs(mid, verbose=1) + self.assertIn((data["size"], data["n_weights"]), [(132115968, 33028992)]) + model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"] + Dim = torch.export.Dim + self.maxDiff = None + self.assertIn("{0:Dim(batch),1:Dim(seq_length)}", self.string_type(ds)) + self.assertEqualAny( + { + "decoder_input_ids": { + 0: Dim("batch", min=1, max=1024), + 1: Dim("seq_length", min=1, max=4096), + }, + "cache_position": {0: Dim("seq_length", min=1, max=4096)}, + "encoder_outputs": [{0: Dim("batch", min=1, max=1024)}], + "past_key_values": [ + [ + [ + {0: Dim("batch", min=1, max=1024)}, + {0: Dim("batch", min=1, max=1024)}, + ], + [ + {0: Dim("batch", min=1, max=1024)}, + {0: Dim("batch", min=1, max=1024)}, + ], + ], + [ + [ + {0: Dim("batch", min=1, max=1024)}, + {0: Dim("batch", min=1, max=1024)}, + ], + [ + {0: Dim("batch", min=1, max=1024)}, + {0: Dim("batch", min=1, max=1024)}, + ], + ], + ], + }, + ds, + ) + model(**inputs) + self.assertEqual( + "#1[T1r3]", + self.string_type(torch.utils._pytree.tree_flatten(inputs["encoder_outputs"])[0]), + ) + with bypass_export_some_errors(patch_transformers=True, verbose=10): + flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0] + self.assertIsInstance(flat, list) + self.assertIsInstance(flat[0], torch.Tensor) + self.assertEqual( + "#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]", + self.string_type(flat), + ) + torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False) + with bypass_export_some_errors(patch_transformers=True, verbose=10): + flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0] + self.assertIsInstance(flat, list) + self.assertIsInstance(flat[0], torch.Tensor) + self.assertEqual( + "#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]", + self.string_type(flat), + ) + torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False) + + @hide_stdout() + def test_imagetext2text_generation(self): + mid = "HuggingFaceM4/tiny-random-idefics" + # mid = "Salesforce/codet5-small" + data = get_untrained_model_with_inputs(mid, verbose=1) + self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)]) + model, inputs = data["model"], data["inputs"] + model(**inputs) + + @hide_stdout() + def test_fill_mask(self): + mid = "google-bert/bert-base-multilingual-cased" + # mid = "Salesforce/codet5-small" + data = get_untrained_model_with_inputs(mid, verbose=1) + self.assertIn((data["size"], data["n_weights"]), [(428383212, 107095803)]) + model, inputs = data["model"], data["inputs"] + model(**inputs) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/_unittests/ut_torch_models/try_tasks.py b/_unittests/ut_tasks/try_tasks.py similarity index 86% rename from _unittests/ut_torch_models/try_tasks.py rename to _unittests/ut_tasks/try_tasks.py index d0fdf22e..e05a2161 100644 --- a/_unittests/ut_torch_models/try_tasks.py +++ b/_unittests/ut_tasks/try_tasks.py @@ -7,7 +7,7 @@ class TestHuggingFaceHubModel(ExtTestCase): @never_test() def test_image_classification(self): - # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k image_c + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k image_c from transformers import ViTImageProcessor, ViTModel from PIL import Image @@ -27,7 +27,7 @@ def test_image_classification(self): @never_test() def test_image_classification_resnet(self): - # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k resnet + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k resnet from transformers import ViTImageProcessor, ViTModel from PIL import Image @@ -47,7 +47,7 @@ def test_image_classification_resnet(self): @never_test() def test_zero_shot_image_classification(self): - # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k zero + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k zero from PIL import Image import requests from transformers import CLIPProcessor, CLIPModel @@ -74,7 +74,7 @@ def test_zero_shot_image_classification(self): @never_test() def test_text2text_generation(self): - # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k text2t + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k text2t import torch from transformers import RobertaTokenizer, T5ForConditionalGeneration @@ -100,7 +100,7 @@ def test_text2text_generation(self): @never_test() def test_imagetext2text_generation(self): - # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k etext2t + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k etext2t # https://huggingface.co/docs/transformers/main/en/tasks/idefics import torch @@ -131,7 +131,7 @@ def test_imagetext2text_generation(self): @never_test() def test_automatic_speech_recognition(self): - # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k automatic_speech + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k automatic_speech # https://huggingface.co/openai/whisper-tiny from transformers import WhisperProcessor, WhisperForConditionalGeneration @@ -195,6 +195,22 @@ def test_automatic_speech_recognition(self): transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) print("--", transcription) + @never_test() + def test_fill_mask(self): + # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k fill + # https://huggingface.co/google-bert/bert-base-multilingual-cased + + from transformers import BertTokenizer, BertModel + + tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased") + model = BertModel.from_pretrained("bert-base-multilingual-cased") + text = "Replace me by any text you'd like." + encoded_input = tokenizer(text, return_tensors="pt") + print() + print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True)) + output = model(**encoded_input) + print("-- outputs", string_type(output, with_shape=True, with_min_max=True)) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/_unittests/ut_torch_models/test_hghub_api.py b/_unittests/ut_torch_models/test_hghub_api.py index bb418e95..3be120fb 100644 --- a/_unittests/ut_torch_models/test_hghub_api.py +++ b/_unittests/ut_torch_models/test_hghub_api.py @@ -100,7 +100,7 @@ def test_hf_all_models(self): def test_load_architecture_task(self): data = load_architecture_task() - print(set(data.values())) + self.assertNotEmpty(set(data.values())) def test_task_from_tags(self): _tags = [ @@ -132,10 +132,10 @@ def test_model_testings_and_architectures(self): def test__ccached_config_64(self): from onnx_diagnostic.torch_models.hghub.hub_data_cached_configs import ( - _cached_hf_internal_testing_tiny_random_beitforimageclassification, + _ccached_hf_internal_testing_tiny_random_beitforimageclassification, ) - conf = _cached_hf_internal_testing_tiny_random_beitforimageclassification() + conf = _ccached_hf_internal_testing_tiny_random_beitforimageclassification() self.assertEqual(conf.auxiliary_channels, 256) diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py index 700948b4..6083812f 100644 --- a/_unittests/ut_torch_models/test_hghub_model.py +++ b/_unittests/ut_torch_models/test_hghub_model.py @@ -1,7 +1,5 @@ import pprint import unittest -import torch -import transformers from onnx_diagnostic.ext_test_case import ( ExtTestCase, hide_stdout, @@ -9,22 +7,13 @@ requires_transformers, ignore_errors, ) -from onnx_diagnostic.torch_models.hghub.model_inputs import ( - config_class_from_architecture, - get_untrained_model_with_inputs, -) +from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs from onnx_diagnostic.torch_models.hghub.hub_api import get_pretrained_config from onnx_diagnostic.torch_models.hghub.hub_data import load_models_testing from onnx_diagnostic.torch_export_patches import bypass_export_some_errors class TestHuggingFaceHubModel(ExtTestCase): - @requires_transformers("4.50") # we limit to some versions of the CI - @requires_torch("2.7") - def test_config_class_from_architecture(self): - config = config_class_from_architecture("LlamaForCausalLM") - self.assertEqual(config, transformers.LlamaConfig) - @hide_stdout() def test_get_untrained_model_with_inputs_tiny_llm(self): mid = "arnir0/Tiny-LLM" @@ -107,91 +96,6 @@ def test_get_untrained_model_with_inputs_clip_vit(self): # different expected value for different version of transformers self.assertIn((data["size"], data["n_weights"]), [(188872708, 47218177)]) - @hide_stdout() - def test_get_untrained_model_with_inputs_text2text_generation(self): - mid = "sshleifer/tiny-marian-en-de" - # mid = "Salesforce/codet5-small" - data = get_untrained_model_with_inputs(mid, verbose=1) - self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)]) - model, inputs = data["model"], data["inputs"] - raise unittest.SkipTest(f"not working for {mid!r}") - model(**inputs) - - @hide_stdout() - def test_get_untrained_model_with_inputs_automatic_speech_recognition(self): - mid = "openai/whisper-tiny" - data = get_untrained_model_with_inputs(mid, verbose=1) - self.assertIn((data["size"], data["n_weights"]), [(132115968, 33028992)]) - model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"] - Dim = torch.export.Dim - self.maxDiff = None - self.assertIn("{0:Dim(batch),1:Dim(seq_length)}", self.string_type(ds)) - self.assertEqualAny( - { - "decoder_input_ids": { - 0: Dim("batch", min=1, max=1024), - 1: Dim("seq_length", min=1, max=4096), - }, - "cache_position": {0: Dim("seq_length", min=1, max=4096)}, - "encoder_outputs": [{0: Dim("batch", min=1, max=1024)}], - "past_key_values": [ - [ - [ - {0: Dim("batch", min=1, max=1024)}, - {0: Dim("batch", min=1, max=1024)}, - ], - [ - {0: Dim("batch", min=1, max=1024)}, - {0: Dim("batch", min=1, max=1024)}, - ], - ], - [ - [ - {0: Dim("batch", min=1, max=1024)}, - {0: Dim("batch", min=1, max=1024)}, - ], - [ - {0: Dim("batch", min=1, max=1024)}, - {0: Dim("batch", min=1, max=1024)}, - ], - ], - ], - }, - ds, - ) - model(**inputs) - self.assertEqual( - "#1[T1r3]", - self.string_type(torch.utils._pytree.tree_flatten(inputs["encoder_outputs"])[0]), - ) - with bypass_export_some_errors(patch_transformers=True, verbose=10): - flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0] - self.assertIsInstance(flat, list) - self.assertIsInstance(flat[0], torch.Tensor) - self.assertEqual( - "#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]", - self.string_type(flat), - ) - torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False) - with bypass_export_some_errors(patch_transformers=True, verbose=10): - flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0] - self.assertIsInstance(flat, list) - self.assertIsInstance(flat[0], torch.Tensor) - self.assertEqual( - "#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]", - self.string_type(flat), - ) - torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False) - - @hide_stdout() - def test_get_untrained_model_with_inputs_imagetext2text_generation(self): - mid = "HuggingFaceM4/tiny-random-idefics" - # mid = "Salesforce/codet5-small" - data = get_untrained_model_with_inputs(mid, verbose=1) - self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)]) - model, inputs = data["model"], data["inputs"] - model(**inputs) - @hide_stdout() @requires_torch("2.7", "reduce test time") @requires_transformers("4.50", "reduce test time") @@ -210,11 +114,9 @@ def _diff(c1, c2): for mid in load_models_testing(): with self.subTest(mid=mid): if mid in { - "hf-internal-testing/tiny-random-BeitForImageClassification", "hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation", "hf-internal-testing/tiny-random-MoonshineForConditionalGeneration", "fxmarty/pix2struct-tiny-random", - "hf-internal-testing/tiny-random-ViTMSNForImageClassification", "hf-internal-testing/tiny-random-YolosModel", }: print(f"-- not implemented yet for {mid!r}") diff --git a/_unittests/ut_torch_models/test_test_helpers.py b/_unittests/ut_torch_models/test_test_helpers.py index 6f846983..a1c99a89 100644 --- a/_unittests/ut_torch_models/test_test_helpers.py +++ b/_unittests/ut_torch_models/test_test_helpers.py @@ -9,12 +9,12 @@ filter_inputs, run_ort_fusion, ) -from onnx_diagnostic.torch_models.hghub.model_inputs import get_get_inputs_function_for_tasks +from onnx_diagnostic.tasks import supported_tasks class TestTestHelper(ExtTestCase): def test_get_inputs_for_task(self): - fcts = get_get_inputs_function_for_tasks() + fcts = supported_tasks() for task in self.subloop(sorted(fcts)): data = get_inputs_for_task(task) self.assertIsInstance(data, dict) diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py index 9aedf4b1..77891e73 100644 --- a/_unittests/ut_xrun_doc/test_documentation_examples.py +++ b/_unittests/ut_xrun_doc/test_documentation_examples.py @@ -53,9 +53,9 @@ def run_test(self, fold: str, name: str, verbose=0) -> int: if '"dot" not found in path.' in st: # dot not installed, this part # is tested in onnx framework - if verbose: - print(f"failed: {name!r} due to missing dot.") - return 0 + raise unittest.SkipTest(f"failed: {name!r} due to missing dot.") + if "We couldn't connect to 'https://huggingface.co'" in st: + raise unittest.SkipTest(f"Connectivity issues due to\n{err}") raise AssertionError( # noqa: B904 "Example '{}' (cmd: {} - exec_prefix='{}') " "failed due to\n{}" diff --git a/_unittests/ut_xrun_doc/test_documentation_recipes.py b/_unittests/ut_xrun_doc/test_documentation_recipes.py index 4ea8c171..d057b744 100644 --- a/_unittests/ut_xrun_doc/test_documentation_recipes.py +++ b/_unittests/ut_xrun_doc/test_documentation_recipes.py @@ -52,9 +52,9 @@ def run_test(self, fold: str, name: str, verbose=0) -> int: if '"dot" not found in path.' in st: # dot not installed, this part # is tested in onnx framework - if verbose: - print(f"failed: {name!r} due to missing dot.") - return 0 + raise unittest.SkipTest(f"failed: {name!r} due to missing dot.") + if "We couldn't connect to 'https://huggingface.co'" in st: + raise unittest.SkipTest(f"Connectivity issues due to\n{err}") raise AssertionError( # noqa: B904 "Example '{}' (cmd: {} - exec_prefix='{}') " "failed due to\n{}" diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py index 6d2456d1..b53e196b 100644 --- a/onnx_diagnostic/_command_lines_parser.py +++ b/onnx_diagnostic/_command_lines_parser.py @@ -303,13 +303,13 @@ def get_parser_validate() -> ArgumentParser: def _cmd_validate(argv: List[Any]): from .helpers import string_type from .torch_models.test_helper import get_inputs_for_task, validate_model, _ds_clean - from .torch_models.hghub.model_inputs import get_get_inputs_function_for_tasks + from .tasks import supported_tasks parser = get_parser_validate() args = parser.parse_args(argv[1:]) if not args.task and not args.mid: print("-- list of supported tasks:") - print("\n".join(sorted(get_get_inputs_function_for_tasks()))) + print("\n".join(supported_tasks())) elif not args.mid: data = get_inputs_for_task(args.task) if args.verbose: diff --git a/onnx_diagnostic/helpers/config_helper.py b/onnx_diagnostic/helpers/config_helper.py new file mode 100644 index 00000000..38d353b6 --- /dev/null +++ b/onnx_diagnostic/helpers/config_helper.py @@ -0,0 +1,80 @@ +import functools +import importlib +import inspect +import re +from typing import Any, Dict, Optional, Tuple, Union +import transformers + + +def check_hasattr(config: Any, *args: Union[str, Tuple[Any, ...]]): + """ + Checks the confiugation has all the attributes in ``args``. + Raises an exception otherwise. + """ + for a in args: + assert isinstance(a, (str, tuple)), f"unexpected type {type(a)} in {args!r}" + if isinstance(a, str): + assert (isinstance(config, dict) and a in config) or hasattr( + config, a + ), f"Missing attribute {a!r} in\n{config}" + elif isinstance(a, tuple): + assert any( + (isinstance(name, str) and hasattr(config, name)) + or all(hasattr(config, _) for _ in name) + for name in a + ), f"All attributes in {a!r} are missing from\n{config}" + + +def update_config(config: Any, mkwargs: Dict[str, Any]): + """Updates a configuration with different values.""" + for k, v in mkwargs.items(): + if isinstance(v, dict): + assert hasattr( + config, k + ), f"missing attribute {k!r} in config={config}, cannot update it with {v}" + update_config(getattr(config, k), v) + else: + setattr(config, k, v) + + +def _pick(config, *atts): + """Returns the first value found in the configuration.""" + for a in atts: + if isinstance(a, str): + if hasattr(config, a): + return getattr(config, a) + elif isinstance(a, tuple): + if all(hasattr(config, _) for _ in a[1:]): + return a[0]([getattr(config, _) for _ in a[1:]]) + raise AssertionError(f"Unable to find any of these {atts!r} in {config}") + + +@functools.cache +def config_class_from_architecture(arch: str, exc: bool = False) -> Optional[type]: + """ + Retrieves the configuration class for a given architecture. + + :param arch: architecture (clas name) + :param exc: raise an exception if not found + :return: type + """ + cls = getattr(transformers, arch) + mod_name = cls.__module__ + mod = importlib.import_module(mod_name) + source = inspect.getsource(mod) + reg = re.compile("config: ([A-Za-z0-9]+)") + fall = reg.findall(source) + if len(fall) == 0: + assert not exc, ( + f"Unable to guess Configuration class name for arch={arch!r}, " + f"module={mod_name!r}, no candidate, source is\n{source}" + ) + return None + unique = set(fall) + assert len(unique) == 1, ( + f"Unable to guess Configuration class name for arch={arch!r}, " + f"module={mod_name!r}, found={unique} (#{len(unique)}), " + f"source is\n{source}" + ) + cls_name = unique.pop() + return getattr(transformers, cls_name) diff --git a/onnx_diagnostic/tasks/__init__.py b/onnx_diagnostic/tasks/__init__.py new file mode 100644 index 00000000..b2d01d36 --- /dev/null +++ b/onnx_diagnostic/tasks/__init__.py @@ -0,0 +1,44 @@ +from typing import Any, Callable, Dict, List, Tuple +from . import ( + automatic_speech_recognition, + fill_mask, + image_classification, + image_text_to_text, + text_generation, + text2text_generation, + zero_shot_image_classification, +) + +__TASKS__ = [ + automatic_speech_recognition, + fill_mask, + image_classification, + image_text_to_text, + text_generation, + text2text_generation, + zero_shot_image_classification, +] + + +def supported_tasks() -> List[str]: + "Returns the list of supported tasks." + return sorted(mod.__TASK__ for mod in __TASKS__) + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + tasks = {mod.__TASK__: mod.reduce_model_config for mod in __TASKS__} + assert task in tasks, f"Task {task!r} not found in {sorted(tasks)}" + return tasks[task](config, task) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + If the configuration is None, the function selects typical dimensions. + It returns parameters and a function. The function creates dummy inputs + if it receives the parameters returned as a first result. + """ + tasks = {mod.__TASK__: mod.random_input_kwargs for mod in __TASKS__} + assert task in tasks, f"Task {task!r} not found in {sorted(tasks)}" + return tasks[task](config, task) diff --git a/onnx_diagnostic/tasks/automatic_speech_recognition.py b/onnx_diagnostic/tasks/automatic_speech_recognition.py new file mode 100644 index 00000000..e9ab82bc --- /dev/null +++ b/onnx_diagnostic/tasks/automatic_speech_recognition.py @@ -0,0 +1,165 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +import transformers +from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache +from ..helpers.config_helper import update_config, check_hasattr + +__TASK__ = "automatic-speech-recognition" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + kwargs: Dict[str, Any] = {} + if hasattr(config, "num_decoder_layers"): + config.num_decoder_layers = min(config.num_decoder_layers, 2) + if hasattr(config, "decoder_layers"): + config.decoder_layers = min(config.decoder_layers, 2) + if hasattr(config, "num_hidden_layers"): + config.num_hidden_layers = min(config.num_hidden_layers, 2) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + dummy_max_token_id: int, + max_source_positions: int, + d_model: int, + num_hidden_layers: int, + encoder_attention_heads: int, + encoder_layers: int, + decoder_layers: int, + head_dim: int, + batch_size: int = 2, + sequence_length: int = 30, + **kwargs, # unused +): + """ + Generates inputs for task ``text2text-generation``. + Example: + + :: + + dict( + cache_position:T7s4, + past_key_values:EncoderDecoderCache( + self_attention_cache=DynamicCache[serialized](#2[#0[],#0[]]), + cross_attention_cache=DynamicCache[serialized](#2[#0[],#0[]]) + ), + decoder_input_ids:T7s1x4, + encoder_outputs:BaseModelOutput(last_hidden_state:T1s1x1500x384), + use_cache:bool,return_dict:bool + ) + dict( + cache_position:T7s1, + past_key_values:EncoderDecoderCache( + self_attention_cache=DynamicCache[serialized](#2[ + #4[T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64], + #4[T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64] + ]), + cross_attention_cache=DynamicCache[serialized](#2[ + #4[T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64], + #4[T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64] + ]), + ), + decoder_input_ids:T7s1x1, + encoder_outputs:BaseModelOutput(last_hidden_state:T1s1x1500x384), + use_cache:bool,return_dict:bool + ) + """ + batch = torch.export.Dim("batch", min=1, max=1024) + seq_length = torch.export.Dim("seq_length", min=1, max=4096) + + shapes = { + "decoder_input_ids": {0: batch, 1: seq_length}, + "cache_position": {0: seq_length}, + "encoder_outputs": [{0: batch}], + "past_key_values": [ + [ + [{0: batch} for _ in range(num_hidden_layers)], + [{0: batch} for _ in range(num_hidden_layers)], + ], + [ + [{0: batch} for _ in range(num_hidden_layers)], + [{0: batch} for _ in range(num_hidden_layers)], + ], + ], + } + inputs = dict( + decoder_input_ids=torch.randint( + 0, dummy_max_token_id, (batch_size, sequence_length) + ).to(torch.int64), + cache_position=(torch.arange(sequence_length) + 5).to(torch.int64), + encoder_outputs=transformers.modeling_outputs.BaseModelOutput( + last_hidden_state=torch.randn(batch_size, max_source_positions, d_model) + ), + past_key_values=make_encoder_decoder_cache( + make_dynamic_cache( + [ + ( + torch.randn( + batch_size, encoder_attention_heads, encoder_layers, head_dim + ), + torch.randn( + batch_size, encoder_attention_heads, encoder_layers, head_dim + ), + ) + for i in range(num_hidden_layers) + ] + ), + make_dynamic_cache( + [ + ( + torch.randn( + batch_size, encoder_attention_heads, max_source_positions, head_dim + ), + torch.randn( + batch_size, encoder_attention_heads, max_source_positions, head_dim + ), + ) + for i in range(num_hidden_layers) + ] + ), + ), + # one these is selected based on the forward method signature + # encoder_last_hidden_state=torch.randn(batch_size, sequence_length2, encoder_dim), + # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr( + config, + "d_model", + "decoder_attention_heads", + "decoder_layers", + "encoder_attention_heads", + "encoder_layers", + "max_source_positions", + "num_hidden_layers", + "vocab_size", + ) + kwargs = dict( + batch_size=2, + sequence_length=30, + dummy_max_token_id=31000 if config is None else config.vocab_size, + max_source_positions=1500 if config is None else config.max_source_positions, + d_model=384 if config is None else config.d_model, + num_hidden_layers=4 if config is None else config.num_hidden_layers, + encoder_attention_heads=6 if config is None else config.encoder_attention_heads, + encoder_layers=4 if config is None else config.encoder_layers, + decoder_attention_heads=6 if config is None else config.decoder_attention_heads, + decoder_layers=4 if config is None else config.decoder_layers, + head_dim=( + 64 if config is None else (config.d_model // config.encoder_attention_heads) + ), + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/tasks/fill_mask.py b/onnx_diagnostic/tasks/fill_mask.py new file mode 100644 index 00000000..ef427b91 --- /dev/null +++ b/onnx_diagnostic/tasks/fill_mask.py @@ -0,0 +1,67 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +from ..helpers.config_helper import update_config, check_hasattr + +__TASK__ = "fill-mask" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + check_hasattr(config, "num_attention_heads", "num_hidden_layers") + kwargs = dict( + num_hidden_layers=min(config.num_hidden_layers, 2), + num_attention_heads=min(config.num_attention_heads, 4), + ) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + batch_size: int, + sequence_length: int, + dummy_max_token_id: int, + **kwargs, # unused +): + """ + Generates inputs for task ``fill-mask``. + Example: + + :: + + input_ids:T7s1x13[101,72654:A16789.23076923077], + token_type_ids:T7s1x13[0,0:A0.0], + attention_mask:T7s1x13[1,1:A1.0]) + """ + batch = torch.export.Dim("batch", min=1, max=1024) + seq_length = torch.export.Dim("sequence_length", min=1, max=1024) + shapes = { + "input_ids": {0: batch, 1: seq_length}, + "token_type_ids": {0: batch, 1: seq_length}, + "attention_mask": {0: batch, 1: seq_length}, + } + inputs = dict( + input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to( + torch.int64 + ), + token_type_ids=torch.zeros((batch_size, sequence_length)).to(torch.int64), + attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr(config, "vocab_size") + kwargs = dict( + batch_size=2, + sequence_length=30, + dummy_max_token_id=31999 if config is None else (config.vocab_size - 1), + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/tasks/image_classification.py b/onnx_diagnostic/tasks/image_classification.py new file mode 100644 index 00000000..4b0c9757 --- /dev/null +++ b/onnx_diagnostic/tasks/image_classification.py @@ -0,0 +1,96 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +from ..helpers.config_helper import update_config, check_hasattr + +__TASK__ = "image-classification" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + check_hasattr(config, ("num_hidden_layers", "hidden_sizes")) + kwargs = dict( + num_hidden_layers=( + min(config.num_hidden_layers, 2) + if hasattr(config, "num_hidden_layers") + else len(config.hidden_sizes) + ) + ) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + input_width: int, + input_height: int, + input_channels: int, + batch_size: int = 2, + dynamic_rope: bool = False, + **kwargs, # unused +): + """ + Generates inputs for task ``image-classification``. + + :param model: model to get the missing information + :param config: configuration used to generate the model + :param batch_size: batch size + :param input_channels: input channel + :param input_width: input width + :param input_height: input height + :return: dictionary + """ + assert isinstance( + input_width, int + ), f"Unexpected type for input_width {type(input_width)}{config}" + assert isinstance( + input_width, int + ), f"Unexpected type for input_height {type(input_height)}{config}" + + shapes = { + "pixel_values": { + 0: torch.export.Dim("batch", min=1, max=1024), + 2: torch.export.Dim("width", min=1, max=4096), + 3: torch.export.Dim("height", min=1, max=4096), + }, + } + inputs = dict( + pixel_values=torch.randn(batch_size, input_channels, input_width, input_height).clamp( + -1, 1 + ), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr(config, ("image_size", "architectures"), "num_channels") + if config is not None: + if hasattr(config, "image_size"): + image_size = config.image_size + else: + assert config.architectures, f"empty architecture in {config}" + from ..torch_models.hghub.hub_api import get_architecture_default_values + + default_values = get_architecture_default_values(config.architectures[0]) + image_size = default_values["image_size"] + if config is None or isinstance(image_size, int): + kwargs = dict( + batch_size=2, + input_width=224 if config is None else image_size, + input_height=224 if config is None else image_size, + input_channels=3 if config is None else config.num_channels, + ) + else: + kwargs = dict( + batch_size=2, + input_width=config.image_size[0], + input_height=config.image_size[1], + input_channels=config.num_channels, + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py new file mode 100644 index 00000000..60d7cfd0 --- /dev/null +++ b/onnx_diagnostic/tasks/image_text_to_text.py @@ -0,0 +1,145 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +from ..helpers.cache_helper import make_dynamic_cache +from ..helpers.config_helper import update_config, check_hasattr, _pick + +__TASK__ = "image-text-to-text" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + kwargs: Dict[str, Any] = {} + if hasattr(config, "num_hidden_layers"): + config.num_hidden_layers = min(config.num_hidden_layers, 2) + if hasattr(config, "vision_config") and hasattr(config.vision_config, "num_hidden_layers"): + config.vision_config.num_hidden_layers = min(config.vision_config.num_hidden_layers, 2) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + dummy_max_token_id: int, + num_key_value_heads: int, + num_hidden_layers: int, + head_dim: int, + width: int, + height: int, + num_channels: int, + batch_size: int = 2, + sequence_length: int = 30, + sequence_length2: int = 3, + n_images: int = 2, + dynamic_rope: bool = False, + **kwargs, # unused +): + """ + Generates input for task ``text-generation``. + + :param model: model to get the missing information + :param config: configuration used to generate the model + :param head_dim: last dimension of the cache + :param dummy_max_token_id: dummy max token id + :param batch_size: batch size + :param sequence_length: sequence length + :param sequence_length2: new sequence length + :param n_images: number of images + :param width: width of the image + :param height: height of the image + :param num_channels: number of channels + :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`) + :return: dictionary + """ + batch = torch.export.Dim("batch", min=1, max=1024) + seq_length = torch.export.Dim("seq_length", min=1, max=4096) + cache_length = torch.export.Dim("cache_length", min=1, max=4096) + images = torch.export.Dim("images", min=1, max=4096) + + shapes = { + "input_ids": {0: batch, 1: seq_length}, + "attention_mask": { + 0: batch, + 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length + }, + "position_ids": { + 0: batch, + 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length + }, + "past_key_values": [ + [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], + [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], + ], + "pixel_values": {0: batch, 1: images}, + "image_attention_mask": {0: batch, 1: seq_length, 2: images}, + } + inputs = dict( + input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to( + torch.int64 + ), + attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to( + torch.int64 + ), + position_ids=torch.arange(sequence_length, sequence_length + sequence_length2) + .to(torch.int64) + .expand((batch_size, -1)), + past_key_values=make_dynamic_cache( + [ + ( + torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), + torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), + ) + for i in range(num_hidden_layers) + ] + ), + image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to( + torch.int64 + ), + pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to( + torch.int64 + ), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr( + config, + "vocab_size", + "hidden_size", + "num_attention_heads", + ("num_key_value_heads", "num_attention_heads"), + "intermediate_size", + "hidden_size", + "vision_config", + ) + check_hasattr(config.vision_config, "image_size", "num_channels") + kwargs = dict( + batch_size=2, + sequence_length=30, + sequence_length2=3, + head_dim=( + 16 + if config is None + else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads) + ), + dummy_max_token_id=31999 if config is None else config.vocab_size - 1, + num_hidden_layers=4 if config is None else config.num_hidden_layers, + num_key_value_heads=( + 8 + if config is None + else _pick(config, "num_key_value_heads", "num_attention_heads") + ), + intermediate_size=1024 if config is None else config.intermediate_size, + hidden_size=512 if config is None else config.hidden_size, + width=224 if config is None else config.vision_config.image_size, + height=224 if config is None else config.vision_config.image_size, + num_channels=3 if config is None else config.vision_config.num_channels, + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/tasks/text2text_generation.py b/onnx_diagnostic/tasks/text2text_generation.py new file mode 100644 index 00000000..abce3714 --- /dev/null +++ b/onnx_diagnostic/tasks/text2text_generation.py @@ -0,0 +1,172 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache +from ..helpers.config_helper import update_config, check_hasattr, _pick + +__TASK__ = "text2text-generation" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + kwargs: Dict[str, Any] = {} + if hasattr(config, "num_decoder_layers"): + config.num_decoder_layers = min(config.num_decoder_layers, 2) + if hasattr(config, "num_hidden_layers"): + config.num_hidden_layers = min(config.num_hidden_layers, 2) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + dummy_max_token_id: int, + num_key_value_heads: int, + num_hidden_layers: int, + head_dim: int, + encoder_dim: int, + batch_size: int = 2, + sequence_length: int = 30, + sequence_length2: int = 3, + **kwargs, # unused +): + """ + Generates input for task ``text2text-generation``. + + :param model: model to get the missing information + :param config: configuration used to generate the model + :param head_dim: last dimension of the cache + :param dummy_max_token_id: dummy max token id + :param batch_size: batch size + :param encoder_dim: last dimension of encoder_last_hidden_state + :param sequence_length: sequence length + :param sequence_length2: new sequence length + :return: dictionary + + Stolen inputs for one model. + + :: + + cache_position:T7s1 + past_key_values:EncoderDecoderCache( + self_attention_cache=DynamicCache( + key_cache=#6[T1s1x8x1x64,...], + value_cache=#6[T1s1x8x1x64,...]), + cross_attention_cache=DynamicCache( + key_cache=#6[T1s1x8x16x64,...], + value_cache=#6[T1s1x8x16x64,...])), + decoder_input_ids:T7s1x1, + encoder_outputs:dict(last_hidden_state:T1s1x16x512) + """ + batch = torch.export.Dim("batch", min=1, max=1024) + seq_length = torch.export.Dim("seq_length", min=1, max=4096) + cache_length = torch.export.Dim("cache_length", min=1, max=4096) + cache_length2 = torch.export.Dim("cache_length2", min=1, max=4096) + + shapes = { + "input_ids": {0: batch, 1: seq_length}, + "decoder_input_ids": {0: batch, 1: torch.export.Dim.DYNAMIC}, + "attention_mask": {0: batch, 1: torch.export.Dim.DYNAMIC}, + # "cache_position": {0: batch, 1: torch.export.Dim.DYNAMIC}, + "past_key_values": [ + [ + [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], + [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], + ], + [ + [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)], + [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)], + ], + ], + # one these is selected based on the forward method signature + # "encoder_last_hidden_state": {0: batch, 1: torch.export.Dim.DYNAMIC}, + # "encoder_outputs": {0: batch, 1: torch.export.Dim.DYNAMIC}, + } + inputs = dict( + input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to( + torch.int64 + ), + decoder_input_ids=torch.randint( + 0, dummy_max_token_id, (batch_size, sequence_length2) + ).to(torch.int64), + attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64), + # cache_position=torch.arange(sequence_length, sequence_length + sequence_length2) + # .to(torch.int64) + # .expand((batch_size, -1)), + past_key_values=make_encoder_decoder_cache( + make_dynamic_cache( + [ + ( + torch.randn( + batch_size, num_key_value_heads, sequence_length, head_dim + ), + torch.randn( + batch_size, num_key_value_heads, sequence_length, head_dim + ), + ) + for i in range(num_hidden_layers) + ] + ), + make_dynamic_cache( + [ + ( + torch.randn( + batch_size, num_key_value_heads, sequence_length2, head_dim + ), + torch.randn( + batch_size, num_key_value_heads, sequence_length2, head_dim + ), + ) + for i in range(num_hidden_layers) + ] + ), + ), + # one these is selected based on the forward method signature + # encoder_last_hidden_state=torch.randn(batch_size, sequence_length2, encoder_dim), + # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr( + config, + "vocab_size", + "hidden_size", + "num_attention_heads", + ("num_hidden_layers", "num_layers"), + ("n_positions", "d_model"), + ( + "num_key_value_heads", + "num_heads", + ("decoder_attention_heads", "encoder_attention_heads"), + ), + ) + kwargs = dict( + batch_size=2, + sequence_length=30, + sequence_length2=3, + head_dim=16 if config is None else (config.d_kv if hasattr(config, "d_kv") else 1), + dummy_max_token_id=31999 if config is None else config.vocab_size - 1, + num_hidden_layers=( + 8 if config is None else _pick(config, "num_hidden_layers", "num_layers") + ), + num_key_value_heads=( + 16 + if config is None + else _pick( + config, + "num_key_value_heads", + "num_heads", + (sum, "encoder_attention_heads", "decoder_attention_heads"), + ) + ), + encoder_dim=512 if config is None else _pick(config, "n_positions", "d_model"), + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/tasks/text_generation.py b/onnx_diagnostic/tasks/text_generation.py new file mode 100644 index 00000000..7cb249a5 --- /dev/null +++ b/onnx_diagnostic/tasks/text_generation.py @@ -0,0 +1,148 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +from ..helpers.cache_helper import make_dynamic_cache +from ..helpers.config_helper import update_config, check_hasattr, _pick + +__TASK__ = "text-generation" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + check_hasattr( + config, + ("head_dim", ("hidden_size", "num_attention_heads")), + "num_hidden_layers", + ("num_key_value_heads", "num_attention_heads"), + "intermediate_size", + "hidden_size", + ) + kwargs = dict( + head_dim=getattr(config, "head_dim", config.hidden_size // config.num_attention_heads), + num_hidden_layers=min(config.num_hidden_layers, 2), + num_key_value_heads=( + config.num_key_value_heads + if hasattr(config, "num_key_value_heads") + else config.num_attention_heads + ), + intermediate_size=( + min(config.intermediate_size, 24576 // 4) + if config.intermediate_size % 4 == 0 + else config.intermediate_size + ), + hidden_size=( + min(config.hidden_size, 3072 // 4) + if config.hidden_size % 4 == 0 + else config.hidden_size + ), + ) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + dummy_max_token_id: int, + num_key_value_heads: int, + num_hidden_layers: int, + head_dim: int, + batch_size: int = 2, + sequence_length: int = 30, + sequence_length2: int = 3, + dynamic_rope: bool = False, + **kwargs, # unused +): + """ + Generates input for task ``text-generation``. + + :param model: model to get the missing information + :param config: configuration used to generate the model + :param head_dim: last dimension of the cache + :param dummy_max_token_id: dummy max token id + :param batch_size: batch size + :param sequence_length: sequence length + :param sequence_length2: new sequence length + :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`) + :return: dictionary + """ + if head_dim is None: + assert config, "head_dim is None, the value cannot be set without a configuration" + head_dim = config.hidden_size // config.num_attention_heads + batch = torch.export.Dim("batch", min=1, max=1024) + seq_length = torch.export.Dim("seq_length", min=1, max=4096) + cache_length = torch.export.Dim("cache_length", min=1, max=4096) + + shapes = { + "input_ids": {0: batch, 1: seq_length}, + "attention_mask": { + 0: batch, + 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length + }, + "position_ids": { + 0: batch, + 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length + }, + "past_key_values": [ + [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], + [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], + ], + } + inputs = dict( + input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to( + torch.int64 + ), + attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to( + torch.int64 + ), + position_ids=torch.arange(sequence_length, sequence_length + sequence_length2) + .to(torch.int64) + .expand((batch_size, -1)), + past_key_values=make_dynamic_cache( + [ + ( + torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), + torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), + ) + for i in range(num_hidden_layers) + ] + ), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr( + config, + "vocab_size", + "hidden_size", + "num_attention_heads", + ("num_key_value_heads", "num_attention_heads"), + "intermediate_size", + "hidden_size", + ) + kwargs = dict( + batch_size=2, + sequence_length=30, + sequence_length2=3, + head_dim=( + 16 + if config is None + else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads) + ), + dummy_max_token_id=31999 if config is None else (config.vocab_size - 1), + num_hidden_layers=4 if config is None else config.num_hidden_layers, + num_key_value_heads=( + 24 + if config is None + else _pick(config, "num_key_value_heads", "num_attention_heads") + ), + intermediate_size=1024 if config is None else config.intermediate_size, + hidden_size=512 if config is None else config.hidden_size, + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/tasks/zero_shot_image_classification.py b/onnx_diagnostic/tasks/zero_shot_image_classification.py new file mode 100644 index 00000000..87f774e5 --- /dev/null +++ b/onnx_diagnostic/tasks/zero_shot_image_classification.py @@ -0,0 +1,106 @@ +from typing import Any, Callable, Dict, Optional, Tuple +import torch +from ..helpers.config_helper import update_config, check_hasattr + +__TASK__ = "zero-shot-image-classification" + + +def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: + """Reduces a model size.""" + check_hasattr(config, "vision_config", "text_config") + check_hasattr(config.vision_config, "num_hidden_layers", "num_attention_heads") + check_hasattr(config.text_config, "num_hidden_layers", "num_attention_heads") + kwargs = dict( + vision_config=dict( + num_hidden_layers=min(2, config.vision_config.num_hidden_layers), + num_attention_heads=min(2, config.vision_config.num_attention_heads), + ), + text_config=dict( + num_hidden_layers=min(2, config.text_config.num_hidden_layers), + num_attention_heads=min(2, config.text_config.num_attention_heads), + ), + ) + update_config(config, kwargs) + return kwargs + + +def get_inputs( + model: torch.nn.Module, + config: Optional[Any], + dummy_max_token_id: int, + batch_size: int = 2, + sequence_length: int = 30, + input_width: int = 224, + input_height: int = 224, + input_channels: int = 3, + batch_size_image=3, + **kwargs, # unused +): + """ + Generates inputs for task ``zero-short-image-classification``. + + :param model: model to get the missing information + :param config: configuration used to generate the model + :param dummy_max_token_id: vocabulary size + :param batch_size: batch size + :param sequence_length: sequence length + :param batch_size_image: number of images + :param input_channels: input channel + :param input_width: input width + :param input_height: input height + :return: dictionary + + # input_ids:T7s2x7 + # attention_mask:T7s2x7 + # pixel_values:T1s2x3x224x224 + """ + assert isinstance( + input_width, int + ), f"Unexpected type for input_width {type(input_width)}{config}" + assert isinstance( + input_width, int + ), f"Unexpected type for input_height {type(input_height)}{config}" + + batch = torch.export.Dim("batch", min=1, max=1024) + seq_length = torch.export.Dim("seq_length", min=1, max=4096) + shapes = { + "inputs_ids": {0: batch, 1: seq_length}, + "attention_mask": {0: batch, 1: seq_length}, + "pixel_values": { + 0: torch.export.Dim("batch_img", min=1, max=1024), + # 2: torch.export.Dim("width", min=1, max=4096), + # 3: torch.export.Dim("height", min=1, max=4096), + }, + } + inputs = dict( + input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to( + torch.int64 + ), + attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64), + pixel_values=torch.randn( + batch_size_image, input_channels, input_width, input_height + ).clamp(-1, 1), + ) + return dict(inputs=inputs, dynamic_shapes=shapes) + + +def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: + """ + Inputs kwargs. + + If the configuration is None, the function selects typical dimensions. + """ + if config is not None: + check_hasattr(config, "vision_config", "text_config") + check_hasattr(config.vision_config, "image_size", "num_channels") + check_hasattr(config.text_config, "vocab_size") + kwargs = dict( + batch_size=2, + batch_size_image=3, + sequence_length=30, + dummy_max_token_id=(49408 if config is None else (config.text_config.vocab_size - 1)), + input_width=224 if config is None else config.vision_config.image_size, + input_height=224 if config is None else config.vision_config.image_size, + input_channels=3 if config is None else config.vision_config.num_channels, + ) + return kwargs, get_inputs diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py index 6acda29d..4da7f968 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_data.py +++ b/onnx_diagnostic/torch_models/hghub/hub_data.py @@ -13,6 +13,7 @@ ASTModel,feature-extraction AlbertModel,feature-extraction BeitForImageClassification,image-classification + BertForMaskedLM,fill-mask BigBirdModel,feature-extraction BlenderbotModel,feature-extraction BloomModel,feature-extraction diff --git a/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py b/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py index 3949c1c3..89786821 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py +++ b/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py @@ -34,13 +34,13 @@ "Config {", "Config (**{", ) - rows = [f"def _cached_{name}():", f' "{c}"', f" return transformers.{sconf})"] + rows = [f"def _ccached_{name}():", f' "{c}"', f" return transformers.{sconf})"] srows = "\\n".join(rows) if len(srows) < 2048: print(srows) else: rows = [ - f"def _cached_{name}():", + f"def _ccached_{name}():", f' "{c}"', f' t64 = textwrap.dedent(\"\"\"', *w64, @@ -130,7 +130,7 @@ def _ccached_microsoft_phi2(): ) -def _cached_hf_internal_testing_tiny_random_beitforimageclassification(): +def _ccached_hf_internal_testing_tiny_random_beitforimageclassification(): "hf-internal-testing/tiny-random-BeitForImageClassification" return transformers.BeitConfig( **{ @@ -3439,3 +3439,32 @@ def _ccached_openai_clip_vit_base_patch16(): }, } ) + + +def _ccached_google_bert_bert_base_multilingual_cased(): + "google-bert/bert-base-multilingual-cased" + return transformers.BertConfig( + **{ + "architectures": ["BertForMaskedLM"], + "attention_probs_dropout_prob": 0.1, + "directionality": "bidi", + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "pooler_fc_size": 768, + "pooler_num_attention_heads": 12, + "pooler_num_fc_layers": 3, + "pooler_size_per_head": 128, + "pooler_type": "first_token_transform", + "type_vocab_size": 2, + "vocab_size": 119547, + } + ) diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py index 969b4e95..0625b0f6 100644 --- a/onnx_diagnostic/torch_models/hghub/model_inputs.py +++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py @@ -1,370 +1,10 @@ -import functools -import importlib import inspect -import re -from typing import Any, Callable, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple import torch import transformers -from ...helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache -from .hub_api import task_from_arch, get_pretrained_config, get_architecture_default_values - - -@functools.cache -def config_class_from_architecture(arch: str, exc: bool = False) -> Optional[type]: - """ - Retrieves the configuration class for a given architecture. - - :param arch: architecture (clas name) - :param exc: raise an exception if not found - :return: type - """ - cls = getattr(transformers, arch) - mod_name = cls.__module__ - mod = importlib.import_module(mod_name) - source = inspect.getsource(mod) - reg = re.compile("config: ([A-Za-z0-9]+)") - fall = reg.findall(source) - if len(fall) == 0: - assert not exc, ( - f"Unable to guess Configuration class name for arch={arch!r}, " - f"module={mod_name!r}, no candidate, source is\n{source}" - ) - return None - unique = set(fall) - assert len(unique) == 1, ( - f"Unable to guess Configuration class name for arch={arch!r}, " - f"module={mod_name!r}, found={unique} (#{len(unique)}), " - f"source is\n{source}" - ) - cls_name = unique.pop() - return getattr(transformers, cls_name) - - -def _update_config(config: Any, kwargs: Dict[str, Any]): - for k, v in kwargs.items(): - if hasattr(config, k): - setattr(config, k, v) - - -def reduce_model_config(config: Any, task: str) -> Dict[str, Any]: - """Reduces a model size.""" - if task == "text-generation": - check_hasattr( - config, - ("head_dim", ("hidden_size", "num_attention_heads")), - "num_hidden_layers", - ("num_key_value_heads", "num_attention_heads"), - "intermediate_size", - "hidden_size", - ) - kwargs = dict( - head_dim=getattr( - config, "head_dim", config.hidden_size // config.num_attention_heads - ), - num_hidden_layers=min(config.num_hidden_layers, 2), - num_key_value_heads=( - config.num_key_value_heads - if hasattr(config, "num_key_value_heads") - else config.num_attention_heads - ), - intermediate_size=( - min(config.intermediate_size, 24576 // 4) - if config.intermediate_size % 4 == 0 - else config.intermediate_size - ), - hidden_size=( - min(config.hidden_size, 3072 // 4) - if config.hidden_size % 4 == 0 - else config.hidden_size - ), - ) - elif task == "image-classification": - check_hasattr(config, ("num_hidden_layers", "hidden_sizes")) - kwargs = dict( - num_hidden_layers=( - min(config.num_hidden_layers, 2) - if hasattr(config, "num_hidden_layers") - else len(config.hidden_sizes) - ) - ) - elif task == "zero-shot-image-classification": - check_hasattr(config, "vision_config", "text_config") - check_hasattr(config.vision_config, "num_hidden_layers", "num_attention_heads") - check_hasattr(config.text_config, "num_hidden_layers", "num_attention_heads") - kwargs = dict( - vision_config=dict( - num_hidden_layers=min(2, config.vision_config.num_hidden_layers), - num_attention_heads=min(2, config.vision_config.num_attention_heads), - ), - text_config=dict( - num_hidden_layers=min(2, config.text_config.num_hidden_layers), - num_attention_heads=min(2, config.text_config.num_attention_heads), - ), - ) - elif task == "text2text-generation": - kwargs = {} - if hasattr(config, "num_decoder_layers"): - config.num_decoder_layers = min(config.num_decoder_layers, 2) - if hasattr(config, "num_hidden_layers"): - config.num_hidden_layers = min(config.num_hidden_layers, 2) - elif task == "image-text-to-text": - kwargs = {} - if hasattr(config, "num_hidden_layers"): - config.num_hidden_layers = min(config.num_hidden_layers, 2) - if hasattr(config, "vision_config") and hasattr( - config.vision_config, "num_hidden_layers" - ): - config.vision_config.num_hidden_layers = min( - config.vision_config.num_hidden_layers, 2 - ) - elif task == "automatic-speech-recognition": - kwargs = {} - if hasattr(config, "num_decoder_layers"): - config.num_decoder_layers = min(config.num_decoder_layers, 2) - if hasattr(config, "decoder_layers"): - config.decoder_layers = min(config.decoder_layers, 2) - if hasattr(config, "num_hidden_layers"): - config.num_hidden_layers = min(config.num_hidden_layers, 2) - else: - raise NotImplementedError(f"Input generation for task {task!r} not implemented yet.") - - update_config(config, kwargs) - return kwargs - - -def update_config(config: Any, mkwargs: Dict[str, Any]): - """Updates a configuration with different values.""" - for k, v in mkwargs.items(): - if isinstance(v, dict): - assert hasattr( - config, k - ), f"missing attribute {k!r} in config={config}, cannot update it with {v}" - update_config(getattr(config, k), v) - else: - setattr(config, k, v) - - -def check_hasattr(config: Any, *args: Union[str, Tuple[Any, ...]]): - """ - Checks the confiugation has all the attributes in ``args``. - Raises an exception otherwise. - """ - for a in args: - assert isinstance(a, (str, tuple)), f"unexpected type {type(a)} in {args!r}" - if isinstance(a, str): - assert (isinstance(config, dict) and a in config) or hasattr( - config, a - ), f"Missing attribute {a!r} in\n{config}" - elif isinstance(a, tuple): - assert any( - (isinstance(name, str) and hasattr(config, name)) - or all(hasattr(config, _) for _ in name) - for name in a - ), f"All attributes in {a!r} are missing from\n{config}" - - -def _pick(config, *atts): - """Returns the first value found in the configuration.""" - for a in atts: - if isinstance(a, str): - if hasattr(config, a): - return getattr(config, a) - elif isinstance(a, tuple): - if all(hasattr(config, _) for _ in a[1:]): - return a[0]([getattr(config, _) for _ in a[1:]]) - raise AssertionError(f"Unable to find any of these {atts!r} in {config}") - - -def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callable]: - """ - Inputs kwargs. - - If the configuration is None, the function selects typical dimensions. - """ - fcts = get_get_inputs_function_for_tasks() - assert task in fcts, f"Unsupported task {task!r}, supported are {sorted(fcts)}" - if task == "text-generation": - if config is not None: - check_hasattr( - config, - "vocab_size", - "hidden_size", - "num_attention_heads", - ("num_key_value_heads", "num_attention_heads"), - "intermediate_size", - "hidden_size", - ) - kwargs = dict( - batch_size=2, - sequence_length=30, - sequence_length2=3, - head_dim=( - 16 - if config is None - else getattr( - config, "head_dim", config.hidden_size // config.num_attention_heads - ) - ), - dummy_max_token_id=31999 if config is None else (config.vocab_size - 1), - num_hidden_layers=4 if config is None else config.num_hidden_layers, - num_key_value_heads=( - 24 - if config is None - else _pick(config, "num_key_value_heads", "num_attention_heads") - ), - intermediate_size=1024 if config is None else config.intermediate_size, - hidden_size=512 if config is None else config.hidden_size, - ) - fct = get_inputs_for_text_generation - elif task == "text2text-generation": - if config is not None: - check_hasattr( - config, - "vocab_size", - "hidden_size", - "num_attention_heads", - ("num_hidden_layers", "num_layers"), - ("n_positions", "d_model"), - ( - "num_key_value_heads", - "num_heads", - ("decoder_attention_heads", "encoder_attention_heads"), - ), - ) - kwargs = dict( - batch_size=2, - sequence_length=30, - sequence_length2=3, - head_dim=16 if config is None else (config.d_kv if hasattr(config, "d_kv") else 1), - dummy_max_token_id=31999 if config is None else config.vocab_size - 1, - num_hidden_layers=( - 8 if config is None else _pick(config, "num_hidden_layers", "num_layers") - ), - num_key_value_heads=( - 16 - if config is None - else _pick( - config, - "num_key_value_heads", - "num_heads", - (sum, "encoder_attention_heads", "decoder_attention_heads"), - ) - ), - encoder_dim=512 if config is None else _pick(config, "n_positions", "d_model"), - ) - fct = get_inputs_for_text2text_generation # type: ignore - elif task == "image-classification": - if config is not None: - check_hasattr(config, ("image_size", "architectures"), "num_channels") - if config is not None: - if hasattr(config, "image_size"): - image_size = config.image_size - else: - assert config.architectures, f"empty architecture in {config}" - default_values = get_architecture_default_values(config.architectures[0]) - image_size = default_values["image_size"] - if config is None or isinstance(image_size, int): - kwargs = dict( - batch_size=2, - input_width=224 if config is None else image_size, - input_height=224 if config is None else image_size, - input_channels=3 if config is None else config.num_channels, - ) - else: - kwargs = dict( - batch_size=2, - input_width=config.image_size[0], - input_height=config.image_size[1], - input_channels=config.num_channels, - ) - fct = get_inputs_for_image_classification # type: ignore - elif task == "zero-shot-image-classification": - if config is not None: - check_hasattr(config, "vision_config", "text_config") - check_hasattr(config.vision_config, "image_size", "num_channels") - check_hasattr(config.text_config, "vocab_size") - kwargs = dict( - batch_size=2, - batch_size_image=3, - sequence_length=30, - dummy_max_token_id=( - 49408 if config is None else (config.text_config.vocab_size - 1) - ), - input_width=224 if config is None else config.vision_config.image_size, - input_height=224 if config is None else config.vision_config.image_size, - input_channels=3 if config is None else config.vision_config.num_channels, - ) - fct = get_inputs_for_zero_shot_image_classification # type: ignore - elif task == "image-text-to-text": - if config is not None: - check_hasattr( - config, - "vocab_size", - "hidden_size", - "num_attention_heads", - ("num_key_value_heads", "num_attention_heads"), - "intermediate_size", - "hidden_size", - "vision_config", - ) - check_hasattr(config.vision_config, "image_size", "num_channels") - kwargs = dict( - batch_size=2, - sequence_length=30, - sequence_length2=3, - head_dim=( - 16 - if config is None - else getattr( - config, "head_dim", config.hidden_size // config.num_attention_heads - ) - ), - dummy_max_token_id=31999 if config is None else config.vocab_size - 1, - num_hidden_layers=4 if config is None else config.num_hidden_layers, - num_key_value_heads=( - 8 - if config is None - else _pick(config, "num_key_value_heads", "num_attention_heads") - ), - intermediate_size=1024 if config is None else config.intermediate_size, - hidden_size=512 if config is None else config.hidden_size, - width=224 if config is None else config.vision_config.image_size, - height=224 if config is None else config.vision_config.image_size, - num_channels=3 if config is None else config.vision_config.num_channels, - ) - fct = get_inputs_for_image_text_to_text # type: ignore - elif task == "automatic-speech-recognition": - if config is not None: - check_hasattr( - config, - "d_model", - "decoder_attention_heads", - "decoder_layers", - "encoder_attention_heads", - "encoder_layers", - "max_source_positions", - "num_hidden_layers", - "vocab_size", - ) - kwargs = dict( - batch_size=2, - sequence_length=30, - dummy_max_token_id=31000 if config is None else config.vocab_size, - max_source_positions=1500 if config is None else config.max_source_positions, - d_model=384 if config is None else config.d_model, - num_hidden_layers=4 if config is None else config.num_hidden_layers, - encoder_attention_heads=6 if config is None else config.encoder_attention_heads, - encoder_layers=4 if config is None else config.encoder_layers, - decoder_attention_heads=6 if config is None else config.decoder_attention_heads, - decoder_layers=4 if config is None else config.decoder_layers, - head_dim=( - 64 if config is None else (config.d_model // config.encoder_attention_heads) - ), - ) - fct = get_inputs_for_speech_automatic_recognition # type: ignore - else: - raise NotImplementedError(f"Input generation for task {task!r} not implemented yet.") - return kwargs, fct +from ...helpers.config_helper import update_config +from ...tasks import reduce_model_config, random_input_kwargs +from .hub_api import task_from_arch, get_pretrained_config def get_untrained_model_with_inputs( @@ -501,506 +141,3 @@ def compute_model_size(model: torch.nn.Module) -> Tuple[int, int]: param_size += param.nelement() * param.element_size() nparams += param.nelement() return param_size, nparams - - -def get_inputs_for_image_classification( - model: torch.nn.Module, - config: Optional[Any], - input_width: int, - input_height: int, - input_channels: int, - batch_size: int = 2, - dynamic_rope: bool = False, - **kwargs, -): - """ - Generates inputs for task ``image-classification``. - - :param model: model to get the missing information - :param config: configuration used to generate the model - :param batch_size: batch size - :param input_channels: input channel - :param input_width: input width - :param input_height: input height - :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` - :return: dictionary - """ - assert isinstance( - input_width, int - ), f"Unexpected type for input_width {type(input_width)}{config}" - assert isinstance( - input_width, int - ), f"Unexpected type for input_height {type(input_height)}{config}" - - shapes = { - "pixel_values": { - 0: torch.export.Dim("batch", min=1, max=1024), - 2: torch.export.Dim("width", min=1, max=4096), - 3: torch.export.Dim("height", min=1, max=4096), - }, - } - inputs = dict( - pixel_values=torch.randn(batch_size, input_channels, input_width, input_height).clamp( - -1, 1 - ), - ) - return dict(inputs=inputs, dynamic_shapes=shapes) - - -def get_inputs_for_zero_shot_image_classification( - model: torch.nn.Module, - config: Optional[Any], - dummy_max_token_id: int, - batch_size: int = 2, - sequence_length: int = 30, - input_width: int = 224, - input_height: int = 224, - input_channels: int = 3, - batch_size_image=3, - **kwargs, -): - """ - Generates inputs for task ``zero-short-image-classification``. - - :param model: model to get the missing information - :param config: configuration used to generate the model - :param dummy_max_token_id: vocabulary size - :param batch_size: batch size - :param sequence_length: sequence length - :param batch_size_image: number of images - :param input_channels: input channel - :param input_width: input width - :param input_height: input height - :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` - :return: dictionary - - # input_ids:T7s2x7 - # attention_mask:T7s2x7 - # pixel_values:T1s2x3x224x224 - """ - assert isinstance( - input_width, int - ), f"Unexpected type for input_width {type(input_width)}{config}" - assert isinstance( - input_width, int - ), f"Unexpected type for input_height {type(input_height)}{config}" - - batch = torch.export.Dim("batch", min=1, max=1024) - seq_length = torch.export.Dim("seq_length", min=1, max=4096) - shapes = { - "inputs_ids": {0: batch, 1: seq_length}, - "attention_mask": {0: batch, 1: seq_length}, - "pixel_values": { - 0: torch.export.Dim("batch_img", min=1, max=1024), - # 2: torch.export.Dim("width", min=1, max=4096), - # 3: torch.export.Dim("height", min=1, max=4096), - }, - } - inputs = dict( - input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to( - torch.int64 - ), - attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64), - pixel_values=torch.randn( - batch_size_image, input_channels, input_width, input_height - ).clamp(-1, 1), - ) - return dict(inputs=inputs, dynamic_shapes=shapes) - - -def get_inputs_for_text_generation( - model: torch.nn.Module, - config: Optional[Any], - dummy_max_token_id: int, - num_key_value_heads: int, - num_hidden_layers: int, - head_dim: int, - batch_size: int = 2, - sequence_length: int = 30, - sequence_length2: int = 3, - dynamic_rope: bool = False, - **kwargs, -): - """ - Generates input for task ``text-generation``. - - :param model: model to get the missing information - :param config: configuration used to generate the model - :param head_dim: last dimension of the cache - :param dummy_max_token_id: dummy max token id - :param batch_size: batch size - :param sequence_length: sequence length - :param sequence_length2: new sequence length - :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`) - :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` - :return: dictionary - """ - if head_dim is None: - assert config, "head_dim is None, the value cannot be set without a configuration" - head_dim = config.hidden_size // config.num_attention_heads - batch = torch.export.Dim("batch", min=1, max=1024) - seq_length = torch.export.Dim("seq_length", min=1, max=4096) - cache_length = torch.export.Dim("cache_length", min=1, max=4096) - - shapes = { - "input_ids": {0: batch, 1: seq_length}, - "attention_mask": { - 0: batch, - 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length - }, - "position_ids": { - 0: batch, - 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length - }, - "past_key_values": [ - [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], - [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], - ], - } - inputs = dict( - input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to( - torch.int64 - ), - attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to( - torch.int64 - ), - position_ids=torch.arange(sequence_length, sequence_length + sequence_length2) - .to(torch.int64) - .expand((batch_size, -1)), - past_key_values=make_dynamic_cache( - [ - ( - torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), - torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), - ) - for i in range(num_hidden_layers) - ] - ), - ) - return dict(inputs=inputs, dynamic_shapes=shapes) - - -def get_inputs_for_image_text_to_text( - model: torch.nn.Module, - config: Optional[Any], - dummy_max_token_id: int, - num_key_value_heads: int, - num_hidden_layers: int, - head_dim: int, - width: int, - height: int, - num_channels: int, - batch_size: int = 2, - sequence_length: int = 30, - sequence_length2: int = 3, - n_images: int = 2, - dynamic_rope: bool = False, - **kwargs, -): - """ - Generates input for task ``text-generation``. - - :param model: model to get the missing information - :param config: configuration used to generate the model - :param head_dim: last dimension of the cache - :param dummy_max_token_id: dummy max token id - :param batch_size: batch size - :param sequence_length: sequence length - :param sequence_length2: new sequence length - :param n_images: number of images - :param width: width of the image - :param height: height of the image - :param num_channels: number of channels - :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`) - :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` - :return: dictionary - """ - batch = torch.export.Dim("batch", min=1, max=1024) - seq_length = torch.export.Dim("seq_length", min=1, max=4096) - cache_length = torch.export.Dim("cache_length", min=1, max=4096) - images = torch.export.Dim("images", min=1, max=4096) - - shapes = { - "input_ids": {0: batch, 1: seq_length}, - "attention_mask": { - 0: batch, - 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length - }, - "position_ids": { - 0: batch, - 1: torch.export.Dim.DYNAMIC, # cache_length + seq_length - }, - "past_key_values": [ - [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], - [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], - ], - "pixel_values": {0: batch, 1: images}, - "image_attention_mask": {0: batch, 1: seq_length, 2: images}, - } - inputs = dict( - input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to( - torch.int64 - ), - attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to( - torch.int64 - ), - position_ids=torch.arange(sequence_length, sequence_length + sequence_length2) - .to(torch.int64) - .expand((batch_size, -1)), - past_key_values=make_dynamic_cache( - [ - ( - torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), - torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim), - ) - for i in range(num_hidden_layers) - ] - ), - image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to( - torch.int64 - ), - pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to( - torch.int64 - ), - ) - return dict(inputs=inputs, dynamic_shapes=shapes) - - -def get_inputs_for_text2text_generation( - model: torch.nn.Module, - config: Optional[Any], - dummy_max_token_id: int, - num_key_value_heads: int, - num_hidden_layers: int, - head_dim: int, - encoder_dim: int, - batch_size: int = 2, - sequence_length: int = 30, - sequence_length2: int = 3, - **kwargs, -): - """ - Generates input for task ``text2text-generation``. - - :param model: model to get the missing information - :param config: configuration used to generate the model - :param head_dim: last dimension of the cache - :param dummy_max_token_id: dummy max token id - :param batch_size: batch size - :param encoder_dim: last dimension of encoder_last_hidden_state - :param sequence_length: sequence length - :param sequence_length2: new sequence length - :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` - :return: dictionary - - Stolen inputs for one model. - - :: - - cache_position:T7s1 - past_key_values:EncoderDecoderCache( - self_attention_cache=DynamicCache( - key_cache=#6[T1s1x8x1x64,...], - value_cache=#6[T1s1x8x1x64,...]), - cross_attention_cache=DynamicCache( - key_cache=#6[T1s1x8x16x64,...], - value_cache=#6[T1s1x8x16x64,...])), - decoder_input_ids:T7s1x1, - encoder_outputs:dict(last_hidden_state:T1s1x16x512) - """ - batch = torch.export.Dim("batch", min=1, max=1024) - seq_length = torch.export.Dim("seq_length", min=1, max=4096) - cache_length = torch.export.Dim("cache_length", min=1, max=4096) - cache_length2 = torch.export.Dim("cache_length2", min=1, max=4096) - - shapes = { - "input_ids": {0: batch, 1: seq_length}, - "decoder_input_ids": {0: batch, 1: torch.export.Dim.DYNAMIC}, - "attention_mask": {0: batch, 1: torch.export.Dim.DYNAMIC}, - # "cache_position": {0: batch, 1: torch.export.Dim.DYNAMIC}, - "past_key_values": [ - [ - [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], - [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)], - ], - [ - [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)], - [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)], - ], - ], - # one these is selected based on the forward method signature - # "encoder_last_hidden_state": {0: batch, 1: torch.export.Dim.DYNAMIC}, - # "encoder_outputs": {0: batch, 1: torch.export.Dim.DYNAMIC}, - } - inputs = dict( - input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to( - torch.int64 - ), - decoder_input_ids=torch.randint( - 0, dummy_max_token_id, (batch_size, sequence_length2) - ).to(torch.int64), - attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64), - # cache_position=torch.arange(sequence_length, sequence_length + sequence_length2) - # .to(torch.int64) - # .expand((batch_size, -1)), - past_key_values=make_encoder_decoder_cache( - make_dynamic_cache( - [ - ( - torch.randn( - batch_size, num_key_value_heads, sequence_length, head_dim - ), - torch.randn( - batch_size, num_key_value_heads, sequence_length, head_dim - ), - ) - for i in range(num_hidden_layers) - ] - ), - make_dynamic_cache( - [ - ( - torch.randn( - batch_size, num_key_value_heads, sequence_length2, head_dim - ), - torch.randn( - batch_size, num_key_value_heads, sequence_length2, head_dim - ), - ) - for i in range(num_hidden_layers) - ] - ), - ), - # one these is selected based on the forward method signature - # encoder_last_hidden_state=torch.randn(batch_size, sequence_length2, encoder_dim), - # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim), - ) - return dict(inputs=inputs, dynamic_shapes=shapes) - - -def get_inputs_for_speech_automatic_recognition( - model: torch.nn.Module, - config: Optional[Any], - dummy_max_token_id: int, - max_source_positions: int, - d_model: int, - num_hidden_layers: int, - encoder_attention_heads: int, - encoder_layers: int, - decoder_layers: int, - head_dim: int, - batch_size: int = 2, - sequence_length: int = 30, - **kwargs, -): - """ - Generates input for task ``text2text-generation``. - - :param model: model to get the missing information - :param config: configuration used to generate the model - :param batch_size: batch size - :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1`` - :return: dictionary - - Stolen inputs for one model. - - :: - - dict( - cache_position:T7s4, - past_key_values:EncoderDecoderCache( - self_attention_cache=DynamicCache[serialized](#2[#0[],#0[]]), - cross_attention_cache=DynamicCache[serialized](#2[#0[],#0[]]) - ), - decoder_input_ids:T7s1x4, - encoder_outputs:BaseModelOutput(last_hidden_state:T1s1x1500x384), - use_cache:bool,return_dict:bool - ) - dict( - cache_position:T7s1, - past_key_values:EncoderDecoderCache( - self_attention_cache=DynamicCache[serialized](#2[ - #4[T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64], - #4[T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64,T1s1x6x4x64] - ]), - cross_attention_cache=DynamicCache[serialized](#2[ - #4[T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64], - #4[T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64,T1s1x6x1500x64] - ]), - ), - decoder_input_ids:T7s1x1, - encoder_outputs:BaseModelOutput(last_hidden_state:T1s1x1500x384), - use_cache:bool,return_dict:bool - ) - """ - batch = torch.export.Dim("batch", min=1, max=1024) - seq_length = torch.export.Dim("seq_length", min=1, max=4096) - - shapes = { - "decoder_input_ids": {0: batch, 1: seq_length}, - "cache_position": {0: seq_length}, - "encoder_outputs": [{0: batch}], - "past_key_values": [ - [ - [{0: batch} for _ in range(num_hidden_layers)], - [{0: batch} for _ in range(num_hidden_layers)], - ], - [ - [{0: batch} for _ in range(num_hidden_layers)], - [{0: batch} for _ in range(num_hidden_layers)], - ], - ], - } - inputs = dict( - decoder_input_ids=torch.randint( - 0, dummy_max_token_id, (batch_size, sequence_length) - ).to(torch.int64), - cache_position=(torch.arange(sequence_length) + 5).to(torch.int64), - encoder_outputs=transformers.modeling_outputs.BaseModelOutput( - last_hidden_state=torch.randn(batch_size, max_source_positions, d_model) - ), - past_key_values=make_encoder_decoder_cache( - make_dynamic_cache( - [ - ( - torch.randn( - batch_size, encoder_attention_heads, encoder_layers, head_dim - ), - torch.randn( - batch_size, encoder_attention_heads, encoder_layers, head_dim - ), - ) - for i in range(num_hidden_layers) - ] - ), - make_dynamic_cache( - [ - ( - torch.randn( - batch_size, encoder_attention_heads, max_source_positions, head_dim - ), - torch.randn( - batch_size, encoder_attention_heads, max_source_positions, head_dim - ), - ) - for i in range(num_hidden_layers) - ] - ), - ), - # one these is selected based on the forward method signature - # encoder_last_hidden_state=torch.randn(batch_size, sequence_length2, encoder_dim), - # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim), - ) - return dict(inputs=inputs, dynamic_shapes=shapes) - - -def get_get_inputs_function_for_tasks() -> Dict[str, Callable]: - """Returns all the function producing dummy inputs for every task.""" - return { - "automatic-speech-recognition": get_inputs_for_speech_automatic_recognition, - "image-classification": get_inputs_for_image_classification, - "image-text-to-text": get_inputs_for_image_text_to_text, - "text-generation": get_inputs_for_text_generation, - "text2text-generation": get_inputs_for_text2text_generation, - "zero-shot-image-classification": get_inputs_for_zero_shot_image_classification, - }