File tree Expand file tree Collapse file tree 2 files changed +11
-9
lines changed Expand file tree Collapse file tree 2 files changed +11
-9
lines changed Original file line number Diff line number Diff line change @@ -75,7 +75,7 @@ def skip(*args, **kwargs):
75
75
model_init_kwargs ["device_map" ] = "auto"
76
76
77
77
merged_kwargs = {** model_init_kwargs , ** cached_file_kwargs }
78
- print (merged_kwargs )
78
+ print ("Loading model with the following kwargs:" , merged_kwargs )
79
79
model = AutoModelForCausalLM .from_pretrained (
80
80
pretrained_model_name_or_path , ** merged_kwargs
81
81
)
@@ -102,10 +102,10 @@ def _prepare_calibration_data(calibration_tokens):
102
102
return calibration_tokens .input_ids
103
103
return calibration_tokens
104
104
105
- if self . quantize_config . activation_scheme == "dynamic" :
106
- quantize_weights (self .model )
107
- else :
108
- quantize_weights ( self .model )
105
+ # Always quantize the weights as they do not require calibration data
106
+ quantize_weights (self .model )
107
+
108
+ if self .quantize_config . activation_scheme == "static" :
109
109
quantize_activations (
110
110
self .model , _prepare_calibration_data (calibration_tokens )
111
111
)
Original file line number Diff line number Diff line change 1
1
from transformers import AutoTokenizer
2
2
from auto_fp8 import AutoFP8ForCausalLM , BaseQuantizeConfig
3
3
4
- pretrained_model_dir = "facebook/opt-125m "
5
- quantized_model_dir = "opt-125m-fp8 "
4
+ pretrained_model_dir = "meta-llama/Meta-Llama-3-8B-Instruct "
5
+ quantized_model_dir = "Meta-Llama-3-8B-Instruct-FP8 "
6
6
7
7
tokenizer = AutoTokenizer .from_pretrained (pretrained_model_dir , use_fast = True )
8
- examples = ["auto-fp8 is an easy-to-use model quantization library" ]
8
+ examples = ["auto_fp8 is an easy-to-use model quantization library" ]
9
9
examples = tokenizer (examples , return_tensors = "pt" ).to ("cuda" )
10
10
11
- quantize_config = BaseQuantizeConfig (quant_method = "fp8" , activation_scheme = "static" )
11
+ quantize_config = BaseQuantizeConfig (
12
+ quant_method = "fp8" , activation_scheme = "dynamic"
13
+ ) # or "static"
12
14
13
15
model = AutoFP8ForCausalLM .from_pretrained (
14
16
pretrained_model_dir , quantize_config = quantize_config
You can’t perform that action at this time.
0 commit comments