|
| 1 | +""" |
| 2 | +.. _l-plot-export_tiny_phi2: |
| 3 | +
|
| 4 | +Untrained microsoft/phi-2 |
| 5 | +========================= |
| 6 | +
|
| 7 | +:epkg:`microsoft/phi-2` is not a big models but still quite big |
| 8 | +when it comes to write unittest. Function |
| 9 | +:func:`onnx_diagnostic.torch_models.hghub.get_untrained_model_with_inputs` |
| 10 | +can be used to create a reduced untrained version of a model coming from |
| 11 | +:epkg:`HuggingFace`. It downloads the configuration from the website |
| 12 | +but creates a dummy model with 1 or 2 hidden layers in order to reduce |
| 13 | +the size and get a fast execution. The goal is usually to test |
| 14 | +the export or to compare performance. The relevance does not matter. |
| 15 | +
|
| 16 | +Create the dummy model |
| 17 | +++++++++++++++++++++++ |
| 18 | +""" |
| 19 | + |
| 20 | +import copy |
| 21 | +import pprint |
| 22 | +import warnings |
| 23 | +import torch |
| 24 | +import onnxruntime |
| 25 | +from onnx_diagnostic import doc |
| 26 | +from onnx_diagnostic.helpers import max_diff, string_diff, string_type |
| 27 | +from onnx_diagnostic.helpers.cache_helper import is_cache_dynamic_registered |
| 28 | +from onnx_diagnostic.helpers.ort_session import make_feeds |
| 29 | +from onnx_diagnostic.torch_export_patches import bypass_export_some_errors |
| 30 | +from onnx_diagnostic.torch_models.hghub import ( |
| 31 | + get_untrained_model_with_inputs, |
| 32 | +) |
| 33 | + |
| 34 | +warnings.simplefilter("ignore") |
| 35 | + |
| 36 | + |
| 37 | +data = get_untrained_model_with_inputs("microsoft/phi-2") |
| 38 | +untrained_model, inputs, dynamic_shapes, config, size, n_weights = ( |
| 39 | + data["model"], |
| 40 | + data["inputs"], |
| 41 | + data["dynamic_shapes"], |
| 42 | + data["configuration"], |
| 43 | + data["size"], |
| 44 | + data["n_weights"], |
| 45 | +) |
| 46 | + |
| 47 | +print(f"model {size / 2**10:1.3f} Kb with {n_weights} parameters.") |
| 48 | +# %% |
| 49 | +# The original model has 2.7 billion parameters. It was divided by more than 10. |
| 50 | +# Let's see the configuration. |
| 51 | +print(config) |
| 52 | + |
| 53 | + |
| 54 | +# %% |
| 55 | +# Inputs: |
| 56 | + |
| 57 | +print(string_type(inputs, with_shape=True)) |
| 58 | + |
| 59 | +# %% |
| 60 | +# With min/max values. |
| 61 | +print(string_type(inputs, with_shape=True, with_min_max=True)) |
| 62 | + |
| 63 | +# %% |
| 64 | +# And the dynamic shapes |
| 65 | +pprint.pprint(dynamic_shapes) |
| 66 | + |
| 67 | +# %% |
| 68 | +# We execute the model to produce expected outputs. |
| 69 | +expected = untrained_model(**copy.deepcopy(inputs)) |
| 70 | +print(f"expected: {string_type(expected, with_shape=True, with_min_max=True)}") |
| 71 | + |
| 72 | + |
| 73 | +# %% |
| 74 | +# Export |
| 75 | +# ++++++ |
| 76 | + |
| 77 | + |
| 78 | +with bypass_export_some_errors(patch_transformers=True) as modificator: |
| 79 | + |
| 80 | + # Unnecessary steps but useful in case of an error |
| 81 | + # We check the cache is registered. |
| 82 | + assert is_cache_dynamic_registered() |
| 83 | + |
| 84 | + # We check there is no discrepancies when the cache is applied. |
| 85 | + d = max_diff(expected, untrained_model(**copy.deepcopy(inputs))) |
| 86 | + assert ( |
| 87 | + d["abs"] < 1e-5 |
| 88 | + ), f"The model with patches produces different outputs: {string_diff(d)}" |
| 89 | + |
| 90 | + # Then we export. |
| 91 | + ep = torch.export.export( |
| 92 | + untrained_model, |
| 93 | + (), |
| 94 | + kwargs=modificator(copy.deepcopy(inputs)), |
| 95 | + dynamic_shapes=dynamic_shapes, |
| 96 | + strict=False, # mandatory for torch==2.6 |
| 97 | + ) |
| 98 | + |
| 99 | + # We check the exported program produces the same results as well. |
| 100 | + d = max_diff(expected, ep.module()(**copy.deepcopy(inputs))) |
| 101 | + assert d["abs"] < 1e-5, f"The exported model different outputs: {string_diff(d)}" |
| 102 | + |
| 103 | +# %% |
| 104 | +# Export to ONNX |
| 105 | +# ++++++++++++++ |
| 106 | +# |
| 107 | +# The export works. We can export to ONNX now. |
| 108 | +# Patches are still needed because the export |
| 109 | +# applies :meth:`torch.export.ExportedProgram.run_decompositions` |
| 110 | +# may export local pieces of the model again. |
| 111 | + |
| 112 | +with bypass_export_some_errors(patch_transformers=True): |
| 113 | + epo = torch.onnx.export( |
| 114 | + ep, (), kwargs=copy.deepcopy(inputs), dynamic_shapes=dynamic_shapes, dynamo=True |
| 115 | + ) |
| 116 | + |
| 117 | +# %% |
| 118 | +# We can save it. |
| 119 | +epo.save("plot_export_tiny_phi2.onnx", external_data=True) |
| 120 | + |
| 121 | +# Or directly get the :class:`onnx.ModelProto`. |
| 122 | +onx = epo.model_proto |
| 123 | + |
| 124 | + |
| 125 | +# %% |
| 126 | +# Discrepancies |
| 127 | +# +++++++++++++ |
| 128 | +# |
| 129 | +# The we check the conversion to ONNX. |
| 130 | +# Let's make sure the ONNX model produces the same outputs. |
| 131 | +# It takes flatten inputs. |
| 132 | + |
| 133 | +feeds = make_feeds(onx, copy.deepcopy(inputs), use_numpy=True) |
| 134 | + |
| 135 | +print(f"torch inputs: {string_type(inputs)}") |
| 136 | +print(f"onxrt inputs: {string_type(feeds)}") |
| 137 | + |
| 138 | +# %% |
| 139 | +# We then create a :class:`onnxruntime.InferenceSession`. |
| 140 | + |
| 141 | +sess = onnxruntime.InferenceSession( |
| 142 | + onx.SerializeToString(), providers=["CPUExecutionProvider"] |
| 143 | +) |
| 144 | + |
| 145 | +# %% |
| 146 | +# Let's run. |
| 147 | +got = sess.run(None, feeds) |
| 148 | + |
| 149 | +# %% |
| 150 | +# And finally the discrepancies. |
| 151 | + |
| 152 | +diff = max_diff(expected, got, flatten=True) |
| 153 | +print(f"onnx discrepancies: {string_diff(diff)}") |
| 154 | + |
| 155 | +# %% |
| 156 | +# It looks good. |
| 157 | + |
| 158 | +# %% |
| 159 | +doc.plot_legend("untrained smaller\nmicrosoft/phi-2", "torch.onnx.export", "green") |
0 commit comments