|
1 | | -"""Simple example script for Gemma3 270M text generation using ONNX. |
2 | | -
|
3 | | -Installation: |
4 | | - uv pip install onnxruntime |
| 1 | +"""Simple example: Export Gemma3 270M to ONNX and generate text. |
5 | 2 |
|
6 | 3 | Usage: |
| 4 | + uv pip install onnxruntime |
7 | 5 | uv run examples/gemma3.py |
8 | 6 | """ |
9 | 7 |
|
|
14 | 12 |
|
15 | 13 | model_id = "google/gemma-3-270m-it" |
16 | 14 | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| 15 | +model = ORTModelForCausalLM.from_pretrained(model_id, export=True) |
17 | 16 |
|
18 | | -# Export to ONNX |
19 | | -model = ORTModelForCausalLM.from_pretrained( |
20 | | - model_id, |
21 | | - export=True, |
22 | | - use_cache=True, |
23 | | -) |
24 | | - |
25 | | -# Inference |
26 | | -conversation = [ |
27 | | - {"role": "user", "content": "Hello! How are you?"} |
28 | | -] |
29 | | - |
30 | | -# Apply chat template |
31 | | -prompt = tokenizer.apply_chat_template( |
32 | | - conversation, |
33 | | - tokenize=False, |
34 | | - add_generation_prompt=True |
35 | | -) |
36 | | - |
| 17 | +# Chat with instruction-tuned model |
| 18 | +conversation = [{"role": "user", "content": "Hello! How are you?"}] |
| 19 | +prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True) |
37 | 20 | inputs = tokenizer(prompt, return_tensors="pt") |
38 | 21 |
|
39 | | -outputs = model.generate( |
40 | | - **inputs, |
41 | | - max_new_tokens=100, |
42 | | - do_sample=True, |
43 | | - temperature=0.7, |
44 | | - top_p=0.9, |
45 | | - pad_token_id=tokenizer.eos_token_id, |
46 | | -) |
47 | | - |
48 | | -# Decode |
| 22 | +outputs = model.generate(**inputs, max_new_tokens=50, pad_token_id=tokenizer.eos_token_id) |
49 | 23 | response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
50 | | -if prompt in response: |
51 | | - response = response[len(prompt):].strip() |
52 | 24 |
|
53 | | -print(f"Response: {response}\n") |
| 25 | +print(response) |
0 commit comments