Fix proj linear count

mgoin · mgoin · commit 0eac98363dfe · 2024-07-18T17:07:34.000-04:00
diff --git a/tests/test_auto_fp8.py b/tests/test_auto_fp8.py
@@ -35,8 +35,12 @@ def test_dynamic_quantization():
     "Qwen/Qwen2-0.5B-Instruct",
 =======
     ("facebook/opt-125m", 160),
+<<<<<<< HEAD
     ("Qwen/Qwen2-0.5B-Instruct", 600),
 >>>>>>> 415c0b7 (Add fixed target sizes)
+=======
+    ("Qwen/Qwen2-0.5B-Instruct", 620),
+>>>>>>> 93c0d54 (Fix proj linear count)
 ]
 
 @pytest.mark.parametrize("model_id,target_size", MODELS)
@@ -180,7 +184,7 @@ def test_kv_cache_static_quantization(model_id, target_size):
     proj_linear_count = 0
     output_scale_count = 0
     for name, _ in tensors.items():
-        if name.endswith("k_proj") or name.endswith("v_proj"):
+        if name.endswith("k_proj.weight") or name.endswith("v_proj.weight"):
             proj_linear_count += 1
         if name.endswith("k_proj.output_scale") or name.endswith("v_proj.output_scale"):
             output_scale_count += 1