Skip to content

Commit 97a3e2c

Browse files
committed
simplify the example
1 parent 6772a10 commit 97a3e2c

File tree

1 file changed

+8
-36
lines changed

1 file changed

+8
-36
lines changed

examples/gemma3.py

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
"""Simple example script for Gemma3 270M text generation using ONNX.
2-
3-
Installation:
4-
uv pip install onnxruntime
1+
"""Simple example: Export Gemma3 270M to ONNX and generate text.
52
63
Usage:
4+
uv pip install onnxruntime
75
uv run examples/gemma3.py
86
"""
97

@@ -14,40 +12,14 @@
1412

1513
model_id = "google/gemma-3-270m-it"
1614
tokenizer = AutoTokenizer.from_pretrained(model_id)
15+
model = ORTModelForCausalLM.from_pretrained(model_id, export=True)
1716

18-
# Export to ONNX
19-
model = ORTModelForCausalLM.from_pretrained(
20-
model_id,
21-
export=True,
22-
use_cache=True,
23-
)
24-
25-
# Inference
26-
conversation = [
27-
{"role": "user", "content": "Hello! How are you?"}
28-
]
29-
30-
# Apply chat template
31-
prompt = tokenizer.apply_chat_template(
32-
conversation,
33-
tokenize=False,
34-
add_generation_prompt=True
35-
)
36-
17+
# Chat with instruction-tuned model
18+
conversation = [{"role": "user", "content": "Hello! How are you?"}]
19+
prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
3720
inputs = tokenizer(prompt, return_tensors="pt")
3821

39-
outputs = model.generate(
40-
**inputs,
41-
max_new_tokens=100,
42-
do_sample=True,
43-
temperature=0.7,
44-
top_p=0.9,
45-
pad_token_id=tokenizer.eos_token_id,
46-
)
47-
48-
# Decode
22+
outputs = model.generate(**inputs, max_new_tokens=50, pad_token_id=tokenizer.eos_token_id)
4923
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50-
if prompt in response:
51-
response = response[len(prompt):].strip()
5224

53-
print(f"Response: {response}\n")
25+
print(response)

0 commit comments

Comments
 (0)