File tree Expand file tree Collapse file tree 2 files changed +10
-3
lines changed
src/llmcompressor/modifiers/transform/quip
tests/llmcompressor/modifiers/transform Expand file tree Collapse file tree 2 files changed +10
-3
lines changed Original file line number Diff line number Diff line change @@ -45,6 +45,7 @@ class QuIPModifier(Modifier):
45
45
)
46
46
randomize : bool = Field (default = False , exclude = True )
47
47
learnable : bool = Field (default = False , exclude = True )
48
+ precision :
48
49
ignore : Union [str , List [str ]] = Field (default = "lm_head" , exclude = True )
49
50
50
51
# optional override for more fine-grained control
Original file line number Diff line number Diff line change
1
+ import os
1
2
import pytest
2
3
import torch
3
4
from transformers import AutoModelForCausalLM
8
9
9
10
10
11
@requires_gpu
12
+ # @pytest.mark.skipif(
13
+ # (not os.getenv("HF_TOKEN")),
14
+ # reason="Skipping tracing tests requiring gated model access",
15
+ # )
11
16
@pytest .mark .parametrize (
12
17
"dtype,exp_mse" ,
13
18
[
14
- (torch .bfloat16 , 1e-2 ),
15
- (torch .float32 , 1e-9 ),
19
+ (torch .bfloat16 , 5e-3 ),
20
+ (torch .float32 , 5e-11 ),
16
21
],
17
22
)
18
23
def test_apply_correctness (dtype , exp_mse ):
19
24
model = AutoModelForCausalLM .from_pretrained (
20
- "meta-llama/Meta- Llama-3-8B -Instruct" , device_map = "cuda" , torch_dtype = dtype
25
+ "meta-llama/Llama-3.2-1B -Instruct" , device_map = "cuda" , torch_dtype = dtype
21
26
)
22
27
state = State (model = model )
23
28
modifier = QuIPModifier (transform_type = "random-hadamard" )
@@ -32,4 +37,5 @@ def test_apply_correctness(dtype, exp_mse):
32
37
with torch .no_grad ():
33
38
output = model (** input )
34
39
40
+ print (torch .nn .MSELoss ()(output .logits , true_output .logits ))
35
41
assert torch .nn .MSELoss ()(output .logits , true_output .logits ) <= exp_mse
You can’t perform that action at this time.
0 commit comments