@@ -48,10 +48,9 @@ def generate_text(
4848 generated_text = tokenizer .decode (outputs [0 ], skip_special_tokens = True )
4949 return generated_text
5050
51- # Define your prompt
5251
53-
54- prompt = "Continue: it rains... "
52+ # Define your prompt
53+ prompt = "Continue: it rains, what should I do? "
5554generated_text = generate_text (prompt , model , tokenizer )
5655print ("-----------------" )
5756print (generated_text )
@@ -69,7 +68,7 @@ def generate_text(
6968# If the default settings do not work, ``skip_kwargs_names`` and ``dynamic_shapes``
7069# can be changed to remove some undesired inputs or add more dynamic dimensions.
7170
72- filename = "plot_export_tiny_llm_method_generate.onnx"
71+ filename = "plot_export_tiny_llm_method_generate.custom. onnx"
7372forward_replacement = method_to_onnx (
7473 model ,
7574 method_name = "forward" , # default value
@@ -87,8 +86,12 @@ def generate_text(
8786 # The input used in the example has a batch size equal to 1, all
8887 # inputs going through method forward will have the same batch size.
8988 # To force the dynamism of this dimension, we need to indicate
90- # which inputs has a batch size.
89+ # which inputs have a batch size.
9190 dynamic_batch_for = {"input_ids" , "attention_mask" , "past_key_values" },
91+ # Earlier versions of pytorch did not accept a dynamic batch size equal to 1,
92+ # this last parameter can be added to expand some inputs if the batch size is 1.
93+ # The exporter should work without.
94+ expand_batch_for = {"input_ids" , "attention_mask" , "past_key_values" },
9295)
9396
9497# %%
@@ -139,6 +142,51 @@ def generate_text(
139142df = pandas .DataFrame (data )
140143print (df )
141144
145+ # %%
146+ # Minimal script to export a LLM
147+ # ++++++++++++++++++++++++++++++
148+ #
149+ # The following lines are a condensed copy with less comments.
150+
151+ # from HuggingFace
152+ print ("----------------" )
153+ MODEL_NAME = "arnir0/Tiny-LLM"
154+ tokenizer = AutoTokenizer .from_pretrained (MODEL_NAME )
155+ model = AutoModelForCausalLM .from_pretrained (MODEL_NAME )
156+
157+ # to export into onnx
158+ forward_replacement = method_to_onnx (
159+ model ,
160+ method_name = "forward" ,
161+ exporter = "onnx-dynamo" ,
162+ filename = "plot_export_tiny_llm_method_generate.dynamo.onnx" ,
163+ patch_kwargs = dict (patch_transformers = True ),
164+ verbose = 0 ,
165+ convert_after_n_calls = 3 ,
166+ dynamic_batch_for = {"input_ids" , "attention_mask" , "past_key_values" },
167+ )
168+ model .forward = lambda * args , ** kwargs : forward_replacement (* args , ** kwargs )
169+
170+ # from HuggingFace again
171+ prompt = "Continue: it rains, what should I do?"
172+ inputs = tokenizer (prompt , return_tensors = "pt" )
173+ outputs = model .generate (
174+ input_ids = inputs ["input_ids" ],
175+ attention_mask = inputs ["attention_mask" ],
176+ max_length = 100 ,
177+ temperature = 1 ,
178+ top_k = 50 ,
179+ top_p = 0.95 ,
180+ do_sample = True ,
181+ )
182+ generated_text = tokenizer .decode (outputs [0 ], skip_special_tokens = True )
183+ print ("prompt answer:" , generated_text )
184+
185+ # to check discrepancies
186+ data = forward_replacement .check_discrepancies ()
187+ df = pandas .DataFrame (data )
188+ print (df )
189+
142190
143191# %%
144192doc .save_fig (doc .plot_dot (filename ), f"{ filename } .png" , dpi = 400 )
0 commit comments