Unit tests for launch script

tharapalanivel · tharapalanivel · commit de548abae046 · 2025-01-07T15:57:12.000-08:00
Signed-off-by: Thara Palanivel &lt;130496890+tharapalanivel@users.noreply.github.com&gt;
diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py
@@ -151,6 +151,8 @@ def run_gptq(model_args, data_args, opt_args, gptq_args):
         quantize_config=quantize_config,
         torch_dtype=model_args.torch_dtype,
     )
+    if model_args.device:
+        model = model.to(model_args.device)
 
     logger.info(f"Loading data from {data_args.training_data_path}")
     tokenizer = AutoTokenizer.from_pretrained(
diff --git a/fms_mo/training_args.py b/fms_mo/training_args.py
@@ -60,6 +60,12 @@ class ModelArguments:
             )
         },
     )
+    device: str = field(
+        default=None,
+        metadata={
+            "help": ("`torch.device`: The device on which the module is (assuming that all the module parameters are on the same device).")
+        }
+    )
 
 
 @dataclass
diff --git a/tests/build/test_launch_script.py b/tests/build/test_launch_script.py
@@ -34,19 +34,22 @@
 from fms_mo.utils.import_utils import available_packages
 
 
-SCRIPT = "fms_mo/run_quant.py"
+SCRIPT = os.path.join(os.path.dirname(__file__), "../..", "fms_mo/run_quant.py")
 MODEL_NAME = "Maykeye/TinyLLama-v0"
 BASE_KWARGS = {
+    "accelerate_launch_args":{
+        "num_processes": 1
+    },
     "model_name_or_path": MODEL_NAME,
-    "output_dir": "tmp",
 }
 BASE_GPTQ_KWARGS = {
     **BASE_KWARGS,
     **{
         "quant_method": "gptq",
         "bits": 4,
-        "group_size": 128,
+        "group_size": 64,
         "training_data_path": WIKITEXT_TOKENIZED_DATA_JSON,
+        "device": "cuda"
     },
 }
 BASE_FP8_KWARGS = {
@@ -55,27 +58,192 @@
         "quant_method": "fp8",
     },
 }
-
+BASE_DQ_KWARGS = {
+    **BASE_KWARGS,
+    **{
+        "quant_method": "dq",
+        "nbits_w": 8,
+        "nbits_a": 8, 
+        "nbits_kvcache": 32,
+        "qa_mode": "fp8_e4m3_scale",
+        "qw_mode": "fp8_e4m3_scale",
+        "qmodel_calibration_new": 0,
+        "training_data_path": WIKITEXT_TOKENIZED_DATA_JSON,
+    },
+}
 
 def setup_env(tempdir):
-    os.environ["TRAINING_SCRIPT"] = SCRIPT
+    os.environ["OPTIMIZER_SCRIPT"] = SCRIPT
     os.environ["PYTHONPATH"] = "./:$PYTHONPATH"
     os.environ["TERMINATION_LOG_FILE"] = tempdir + "/termination-log"
+    os.environ["SET_NUM_PROCESSES_TO_NUM_GPUS"] = "False"
 
 
 def cleanup_env():
     os.environ.pop("OPTIMIZER_SCRIPT", None)
     os.environ.pop("PYTHONPATH", None)
     os.environ.pop("TERMINATION_LOG_FILE", None)
 
-### Tests for model dtype edge cases
+
 @pytest.mark.skipif(not available_packages["auto_gptq"], reason="Only runs if auto-gptq package is installed")
 def test_successful_gptq():
     """Check if we can gptq models"""
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        GPTQ_KWARGS = {**BASE_GPTQ_KWARGS, **{"output_dir": tempdir}}
+        serialized_args = serialize_args(GPTQ_KWARGS)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+
+        assert main() == 0
+
+        _validate_termination_files_when_quantization_succeeds(tempdir)
+        _validate_quantization_output(tempdir, "gptq")
+
+
+@pytest.mark.skipif(not available_packages["llmcompressor"], reason="Only runs if llm-compressor package is installed")
+def test_successful_fp8():
+    """Check if we can fp8 quantize models"""
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        FP8_KWARGS = {**BASE_FP8_KWARGS, **{"output_dir": tempdir}}
+        serialized_args = serialize_args(FP8_KWARGS)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+
+        assert main() == 0
+
+        _validate_termination_files_when_quantization_succeeds(tempdir)
+        _validate_quantization_output(tempdir, "fp8")
+
+
+def test_successful_dq():
+    """Check if we can dq models"""
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        DQ_KWARGS = {**BASE_DQ_KWARGS, **{"output_dir": tempdir}}
+        serialized_args = serialize_args(DQ_KWARGS)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+
+        assert main() == 0
+
+        _validate_termination_files_when_quantization_succeeds(tempdir)
+        _validate_quantization_output(tempdir, "dq")
+
+
+def test_bad_script_path():
+    """Check for appropriate error for an invalid optimization script location"""
     with tempfile.TemporaryDirectory() as tempdir:
         setup_env(tempdir)
         QUANT_KWARGS = {**BASE_KWARGS, **{"output_dir": tempdir}}
         serialized_args = serialize_args(QUANT_KWARGS)
         os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+        os.environ["OPTIMIZER_SCRIPT"] = "/not/here"
+
+        with pytest.raises(SystemExit) as pytest_wrapped_e:
+            main()
+        assert pytest_wrapped_e.type == SystemExit
+        assert pytest_wrapped_e.value.code == INTERNAL_ERROR_EXIT_CODE
+        assert os.stat(tempdir + "/termination-log").st_size > 0
+
+
+def test_blank_config_json_env_var():
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = ""
+        with pytest.raises(SystemExit) as pytest_wrapped_e:
+            main()
+        assert pytest_wrapped_e.type == SystemExit
+        assert pytest_wrapped_e.value.code == USER_ERROR_EXIT_CODE
+        assert os.stat(tempdir + "/termination-log").st_size > 0
+
+def test_blank_config_json_path():
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        os.environ["FMS_MO_CONFIG_JSON_PATH"] = ""
+        with pytest.raises(SystemExit) as pytest_wrapped_e:
+            main()
+        assert pytest_wrapped_e.type == SystemExit
+        assert pytest_wrapped_e.value.code == USER_ERROR_EXIT_CODE
+        assert os.stat(tempdir + "/termination-log").st_size > 0
+
+def test_faulty_file_path():
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        faulty_path = os.path.join(tempdir, "non_existent_file.pkl")
+        QUANT_KWARGS = {
+            **BASE_KWARGS,
+            **{"training_data_path": faulty_path, "output_dir": tempdir},
+        }
+        serialized_args = serialize_args(QUANT_KWARGS)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+        with pytest.raises(SystemExit) as pytest_wrapped_e:
+            main()
+        assert pytest_wrapped_e.type == SystemExit
+        assert pytest_wrapped_e.value.code == USER_ERROR_EXIT_CODE
+        assert os.stat(tempdir + "/termination-log").st_size > 0
+
+
+def test_bad_base_model_path():
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        DQ_KWARGS = {
+            **BASE_DQ_KWARGS,
+            **{"model_name_or_path": "/wrong/path", "output_dir": tempdir},
+        }
+        serialized_args = serialize_args(DQ_KWARGS)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+        with pytest.raises(SystemExit) as pytest_wrapped_e:
+            main()
+        assert pytest_wrapped_e.type == SystemExit
+        assert pytest_wrapped_e.value.code == USER_ERROR_EXIT_CODE
+        assert os.stat(tempdir + "/termination-log").st_size > 0
+
+
+def test_config_parsing_error():
+    with tempfile.TemporaryDirectory() as tempdir:
+        setup_env(tempdir)
+        DQ_KWARGS = {**BASE_DQ_KWARGS, **{"nbits_w": "eight", "output_dir": tempdir}}  # Intentional type error
+        serialized_args = serialize_args(DQ_KWARGS)
+        os.environ["FMS_MO_CONFIG_JSON_ENV_VAR"] = serialized_args
+        with pytest.raises(SystemExit) as pytest_wrapped_e:
+            main()
+        assert pytest_wrapped_e.type == SystemExit
+        assert pytest_wrapped_e.value.code == USER_ERROR_EXIT_CODE
+        assert os.stat(tempdir + "/termination-log").st_size > 0
+
+
+def _validate_termination_files_when_quantization_succeeds(base_dir):
+    # Check termination log and .complete files exist
+    assert os.path.exists(os.path.join(base_dir, "/termination-log")) is False
+    assert os.path.exists(os.path.join(base_dir, ".complete")) is True
+    # assert os.path.exists(os.path.join(base_dir, training_logs_filename)) is True
+
+
+def _validate_quantization_output(base_dir, quant_method):
+    # Check tokenizer files exist
+    assert os.path.exists(os.path.join(base_dir, "tokenizer.json")) is True
+    assert os.path.exists(os.path.join(base_dir, "special_tokens_map.json")) is True
+    assert os.path.exists(os.path.join(base_dir, "tokenizer_config.json")) is True
+    assert os.path.exists(os.path.join(base_dir, "tokenizer.model")) is True
+
+    # Check quantized model files exist
+    if quant_method == "gptq":
+        assert len(glob.glob(os.path.join(base_dir, "gptq_model-*.safetensors"))) > 0
+        assert os.path.exists(os.path.join(base_dir, "quantize_config.json")) is True
+        assert os.path.exists(os.path.join(base_dir, "config.json")) is True
+
+    elif quant_method ==  "fp8":
+        assert len(glob.glob(os.path.join(base_dir, "model*.safetensors"))) > 0
+        assert os.path.exists(os.path.join(base_dir, "generation_config.json")) is True
+        assert os.path.exists(os.path.join(base_dir, "config.json")) is True
+        assert os.path.exists(os.path.join(base_dir, "recipe.yaml")) is True
+
+    elif quant_method ==  "dq":
+        assert len(glob.glob(os.path.join(base_dir, "model*.safetensors"))) > 0
+        assert os.path.exists(os.path.join(base_dir, "generation_config.json")) is True
+        assert os.path.exists(os.path.join(base_dir, "config.json")) is True
+
 
-        assert main() == 0
+def test_cleanup():
+    # Runs to unset env variables that could disrupt other tests
+    cleanup_env()
+    assert True

Original file line number	Diff line number	Diff line change
`@@ -151,6 +151,8 @@ def run_gptq(model_args, data_args, opt_args, gptq_args):`
`151`	`151`	`quantize_config=quantize_config,`
`152`	`152`	`torch_dtype=model_args.torch_dtype,`
`153`	`153`	`)`
	`154`	`+ if model_args.device:`
	`155`	`+ model = model.to(model_args.device)`
`154`	`156`
`155`	`157`	`logger.info(f"Loading data from {data_args.training_data_path}")`
`156`	`158`	`tokenizer = AutoTokenizer.from_pretrained(`
Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,12 @@ class ModelArguments:`
`60`	`60`	`)`
`61`	`61`	`},`
`62`	`62`	`)`
	`63`	`+ device: str = field(`
	`64`	`+ default=None,`
	`65`	`+ metadata={`
	`66`	+ "help": ("`torch.device`: The device on which the module is (assuming that all the module parameters are on the same device).")
	`67`	`+ }`
	`68`	`+ )`
`63`	`69`
`64`	`70`
`65`	`71`	`@dataclass`