12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
15
+ import os
16
+
15
17
import pytest
16
18
import torch
17
19
from compressed_tensors .transform import (
20
22
apply_transform_config ,
21
23
)
22
24
from compressed_tensors .utils import offloaded_dispatch
25
+ from safetensors import safe_open
23
26
from tests .testing_utils import requires_accelerate , requires_gpu
27
+ from transformers import AutoModelForCausalLM , AutoTokenizer
24
28
25
29
26
30
@pytest .mark .parametrize ("type" , ("hadamard" , "random-hadamard" ))
@@ -38,15 +42,57 @@ def test_serialization(type, randomize, model_apply, tmp_path, offload=False):
38
42
apply_transform_config (model , config )
39
43
40
44
# save model
41
- model .save_pretrained (tmp_path )
45
+ model_path = os .path .join (tmp_path , "test_model_path" )
46
+ model .save_pretrained (model_path )
47
+
48
+ # check that saved values match model values
49
+ # note that shared weights are only serialized once
50
+ safetensors_path = os .path .join (model_path , "model.safetensors" )
51
+ with safe_open (safetensors_path , framework = "pt" , device = "cpu" ) as file :
52
+ saved_keys = set (file .keys ())
53
+ assert {
54
+ "fcs.0.weight" ,
55
+ "fcs.1.weight" ,
56
+ "fcs.2.weight" ,
57
+ "fcs.3.weight" ,
58
+ "fcs.4.weight" ,
59
+ } <= saved_keys
60
+ for key in saved_keys :
61
+ param = model .get_parameter (key )
62
+ saved_param = file .get_tensor (key )
42
63
43
- # TODO: reload model
64
+ if param .device .type != "meta" : # skip testing values in offload case
65
+ assert torch .equal (param , saved_param )
44
66
45
67
46
- @pytest .mark .skip (reason = "Requires changes in upstream transformers" )
47
68
@requires_gpu
48
69
@requires_accelerate ()
49
70
@pytest .mark .parametrize ("type" , ("hadamard" , "random-hadamard" ))
50
71
@pytest .mark .parametrize ("randomize" , (True , False ))
51
72
def test_serialization_offload (type , randomize , model_apply , tmp_path ):
52
73
test_serialization (type , randomize , model_apply , tmp_path , offload = True )
74
+
75
+
76
+ @pytest .mark .skip ("Requires transformers#40673" )
77
+ @requires_gpu
78
+ @pytest .mark .parametrize (
79
+ "model_stub,exp_perplexity" ,
80
+ [
81
+ ("nm-testing/Llama-3.2-1B-Instruct-spinquantR1R2R4-w4a16" , 10.0 ),
82
+ ("nm-testing/Llama-3.2-1B-Instruct-quip-w4a16" , 10.0 ),
83
+ ],
84
+ )
85
+ def test_load_perplexity (model_stub , exp_perplexity ):
86
+ model = AutoModelForCausalLM .from_pretrained (model_stub , device_map = "cuda" )
87
+ tokenizer = AutoTokenizer .from_pretrained (model_stub )
88
+
89
+ prompt = "The capital of France is Paris, the capital of Germany is Berlin"
90
+ inputs = tokenizer (prompt , return_tensors = "pt" )
91
+ inputs = {key : value .to (model .device ) for key , value in inputs .items ()}
92
+ labels = inputs ["input_ids" ]
93
+
94
+ with torch .no_grad ():
95
+ outputs = model (** inputs , labels = labels )
96
+
97
+ perplexity = torch .exp (outputs .loss )
98
+ assert perplexity <= exp_perplexity
0 commit comments