Skip to content

Commit 97c8d30

Browse files
committed
simplify example
Signed-off-by: Kyle Sayers <[email protected]>
1 parent cf1f87d commit 97c8d30

File tree

1 file changed

+1
-7
lines changed

1 file changed

+1
-7
lines changed

examples/quantization_w4a16/llama3_example.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import torch
21
from datasets import load_dataset
32
from transformers import AutoModelForCausalLM, AutoTokenizer
43

@@ -8,11 +7,7 @@
87
# Select model and load it.
98
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
109

11-
model = AutoModelForCausalLM.from_pretrained(
12-
MODEL_ID,
13-
device_map="cpu",
14-
torch_dtype="auto",
15-
)
10+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
1611
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
1712

1813
# Select calibration dataset.
@@ -65,7 +60,6 @@ def tokenize(sample):
6560
recipe=recipe,
6661
max_seq_length=MAX_SEQUENCE_LENGTH,
6762
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
68-
oneshot_device=torch.device("cuda") if torch.cuda.is_available() else None,
6963
)
7064

7165
# Confirm generations of the quantized model look sane.

0 commit comments

Comments
 (0)