Add fixed target sizes

mgoin · mgoin · commit 0967345efb34 · 2024-07-18T17:07:28.000-04:00
diff --git a/tests/test_auto_fp8.py b/tests/test_auto_fp8.py
@@ -15,6 +15,7 @@
 from auto_fp8 import AutoFP8ForCausalLM, BaseQuantizeConfig
 
 MODELS = [
+<<<<<<< HEAD
 <<<<<<< HEAD
     ("facebook/opt-125m", 160),
     ("Qwen/Qwen2-0.5B-Instruct", 620),
@@ -32,10 +33,14 @@ def test_dynamic_quantization():
 =======
     "facebook/opt-125m",
     "Qwen/Qwen2-0.5B-Instruct",
+=======
+    ("facebook/opt-125m", 160),
+    ("Qwen/Qwen2-0.5B-Instruct", 600),
+>>>>>>> 415c0b7 (Add fixed target sizes)
 ]
 
-@pytest.mark.parametrize("model_id", MODELS)
-def test_dynamic_quantization(model_id):
+@pytest.mark.parametrize("model_id,target_size", MODELS)
+def test_dynamic_quantization(model_id, target_size):
     quantized_model_dir = model_id.split("/")[-1] + "-fp8-dynamic"
 >>>>>>> 2739d61 (Add Qwen test)
 
@@ -53,6 +58,7 @@ def test_dynamic_quantization(model_id):
     model_size = os.path.getsize(f"{quantized_model_dir}/model.safetensors")
     shutil.rmtree(quantized_model_dir)
 
+<<<<<<< HEAD
 <<<<<<< HEAD
     # We expect the quantized model to be a certain size
     target_size = target_size * (1024 * 1024)
@@ -76,6 +82,15 @@ def test_static_quantization():
 =======
 @pytest.mark.parametrize("model_id", MODELS)
 def test_static_quantization(model_id):
+=======
+    # We expect the model to be a certain size
+    target_size = target_size * (1024 * 1024)
+    assert model_size < target_size
+
+
+@pytest.mark.parametrize("model_id,target_size", MODELS)
+def test_static_quantization(model_id, target_size):
+>>>>>>> 415c0b7 (Add fixed target sizes)
     quantized_model_dir = model_id.split("/")[-1] + "-fp8-static"
 >>>>>>> 2739d61 (Add Qwen test)
 
@@ -95,6 +110,7 @@ def test_static_quantization(model_id):
     model_size = os.path.getsize(f"{quantized_model_dir}/model.safetensors")
     shutil.rmtree(quantized_model_dir)
 
+<<<<<<< HEAD
     # We expect the quantized model to be a certain size
     target_size = target_size * (1024 * 1024)
     assert model_size < target_size
@@ -134,11 +150,14 @@ def test_kv_cache_static_quantization(model_id, target_size):
     shutil.rmtree(quantized_model_dir)
 
     # We expect the quantized model to be a certain size
+=======
+    # We expect the model to be < 160MB
+>>>>>>> 415c0b7 (Add fixed target sizes)
     target_size = target_size * (1024 * 1024)
     assert model_size < target_size
 
-@pytest.mark.parametrize("model_id", MODELS)
-def test_kv_cache_static_quantization(model_id):
+@pytest.mark.parametrize("model_id,target_size", MODELS)
+def test_kv_cache_static_quantization(model_id, target_size):
     quantized_model_dir = model_id.split("/")[-1] + "-fp8-static-kv"
 
     tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
@@ -172,5 +191,5 @@ def test_kv_cache_static_quantization(model_id):
     shutil.rmtree(quantized_model_dir)
 
     # We expect the model to be < 160MB
-    target_size = 160 * (1024 * 1024)
+    target_size = target_size * (1024 * 1024)
     assert model_size < target_size