File tree Expand file tree Collapse file tree 1 file changed +1
-7
lines changed
examples/quantization_w4a16 Expand file tree Collapse file tree 1 file changed +1
-7
lines changed Original file line number Diff line number Diff line change 1
- import torch
2
1
from datasets import load_dataset
3
2
from transformers import AutoModelForCausalLM , AutoTokenizer
4
3
8
7
# Select model and load it.
9
8
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
10
9
11
- model = AutoModelForCausalLM .from_pretrained (
12
- MODEL_ID ,
13
- device_map = "cpu" ,
14
- torch_dtype = "auto" ,
15
- )
10
+ model = AutoModelForCausalLM .from_pretrained (MODEL_ID , torch_dtype = "auto" )
16
11
tokenizer = AutoTokenizer .from_pretrained (MODEL_ID )
17
12
18
13
# Select calibration dataset.
@@ -65,7 +60,6 @@ def tokenize(sample):
65
60
recipe = recipe ,
66
61
max_seq_length = MAX_SEQUENCE_LENGTH ,
67
62
num_calibration_samples = NUM_CALIBRATION_SAMPLES ,
68
- oneshot_device = torch .device ("cuda" ) if torch .cuda .is_available () else None ,
69
63
)
70
64
71
65
# Confirm generations of the quantized model look sane.
You can’t perform that action at this time.
0 commit comments