|
| 1 | +""" |
| 2 | +.. _l-plot-tiny-llm-export-patched: |
| 3 | +
|
| 4 | +Export Tiny-LLM with patches |
| 5 | +============================ |
| 6 | +
|
| 7 | +Many models from :epkg:`transformers` cannot be converted because |
| 8 | +the implementation uses cache classes. Let's see how to get around that. |
| 9 | +We focus on the model |
| 10 | +`Tiny-LLM <https://huggingface.co/arnir0/Tiny-LLM>`_. |
| 11 | +To avoid downloading any weights, we write a function creating a |
| 12 | +random model based on the same architecture. |
| 13 | +This continues example :ref:`l-plot-tiny-llm-export`. |
| 14 | +
|
| 15 | +Errors |
| 16 | +++++++ |
| 17 | +
|
| 18 | +They depend on transformers version. |
| 19 | +
|
| 20 | +``transformers>=4.40,<4.50`` cannot serialize DynamicCache and cannot |
| 21 | +map dynamic shapes to instances of DynamicCache. The following errors |
| 22 | +would appear: |
| 23 | +
|
| 24 | +:: |
| 25 | +
|
| 26 | + torch._dynamo.exc.UserError: Cannot associate shape |
| 27 | + [[{0: <class '....batch'>, 2: <class '....cache_length'>}], |
| 28 | + [{0: <class '....batch'>, 2: <class '....cache_length'>}]] |
| 29 | + specified at `dynamic_shapes['past_key_values']` |
| 30 | + to non-tensor type <class 'transformers.cache_utils.DynamicCache'> |
| 31 | + at `inputs['past_key_values']` (expected None) |
| 32 | + For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#dynamic-shapes-validation |
| 33 | +
|
| 34 | +With ``transformers==4.50``, it shows the following: |
| 35 | +
|
| 36 | +:: |
| 37 | +
|
| 38 | + torch._dynamo.exc.UserError: Constraints violated (batch)! |
| 39 | + For more information, run with TORCH_LOGS="+dynamic". |
| 40 | + - Not all values of batch = L['args'][1]['input_ids'].size()[0] |
| 41 | + in the specified range batch <= 1024 are valid |
| 42 | + because batch was inferred to be a constant (2). |
| 43 | + - Not all values of batch = L['args'][1]['attention_mask'].size()[0] |
| 44 | + in the specified range batch <= 1024 are valid |
| 45 | + because batch was inferred to be a constant (2). |
| 46 | + - Not all values of batch = L['args'][1]['past_key_values']['key_cache'][0].size()[0] |
| 47 | + in the specified range batch <= 1024 are valid |
| 48 | + because batch was inferred to be a constant (2). |
| 49 | + - Not all values of batch = L['args'][1]['past_key_values']['value_cache'][0].size()[0] |
| 50 | + in the specified range batch <= 1024 are valid |
| 51 | + because batch was inferred to be a constant (2). |
| 52 | + Suggested fixes: |
| 53 | + batch = 2 |
| 54 | +
|
| 55 | +However, this package implements a patch mechanism |
| 56 | +with replaces the part causing these issues. |
| 57 | +
|
| 58 | +.. note:: restart after an export failure |
| 59 | +
|
| 60 | + If the export fails, it is better to start executing again, |
| 61 | + or restart the kernel if you are in the notebook. |
| 62 | + The export may leave :epkg:`torch` in one unstable state. |
| 63 | +""" |
| 64 | + |
| 65 | +import copy |
| 66 | +import torch |
| 67 | +import transformers |
| 68 | +from onnx_diagnostic.torch_export_patches.onnx_export_errors import bypass_export_some_errors |
| 69 | +from onnx_diagnostic.torch_models.llms import get_tiny_llm |
| 70 | + |
| 71 | + |
| 72 | +experiment = get_tiny_llm() |
| 73 | +untrained_model, inputs, dynamic_shapes = ( |
| 74 | + experiment["model"], |
| 75 | + experiment["inputs"], |
| 76 | + experiment["dynamic_shapes"], |
| 77 | +) |
| 78 | + |
| 79 | +cloned_inputs = copy.deepcopy(inputs) |
| 80 | + |
| 81 | + |
| 82 | +with bypass_export_some_errors(patch_transformers=True) as modificator: |
| 83 | + ep = torch.export.export( |
| 84 | + untrained_model, |
| 85 | + (), |
| 86 | + kwargs=modificator(cloned_inputs), |
| 87 | + dynamic_shapes=dynamic_shapes, |
| 88 | + ) |
| 89 | + print("It worked:") |
| 90 | + print(ep) |
| 91 | + |
| 92 | +# %% |
| 93 | +# With the original model |
| 94 | +# +++++++++++++++++++++++ |
| 95 | + |
| 96 | +MODEL_NAME = "arnir0/Tiny-LLM" |
| 97 | +tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME) |
| 98 | +model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME) |
| 99 | + |
| 100 | +cloned_inputs = copy.deepcopy(inputs) |
| 101 | + |
| 102 | +with bypass_export_some_errors(patch_transformers=True) as modificator: |
| 103 | + ep = torch.export.export( |
| 104 | + model, |
| 105 | + (), |
| 106 | + kwargs=modificator(cloned_inputs), |
| 107 | + dynamic_shapes=dynamic_shapes, |
| 108 | + ) |
| 109 | + print("It worked:") |
| 110 | + print(ep) |
0 commit comments