[Tests] Remove the compress entrypoint (#1317)

dsikka · web-flow · commit 79ff3135081b · 2025-04-02T16:37:14.000Z
Summary
- We no longer support the `compress` entrypoint, an alias to `apply`
since landing the removal of the StageRunner
- Update the test case to reflect this
- Also update the recipe such that the constant pruning modifier
reflects the layers being targeted by the SparseGPT Modifier. down_proj
was missing, resulting in different sparsities
diff --git a/tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml b/tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml
@@ -15,6 +15,7 @@ test_train_stage:
         "re:.*self_attn.k_proj",
         "re:.*self_attn.v_proj",
         "re:.*self_attn.o_proj",
+        "re:.*mlp.down_proj",
         "re:.*mlp.gate_proj",
         "re:.*mlp.up_proj"
       ]
diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py
@@ -18,15 +18,15 @@ class TestOneshotAndFinetuneWithTokenizer(unittest.TestCase):
     dataset_config_name = None
 
     def setUp(self):
-        self.output = "./finetune_output"
+        self.output = "./sparsity_finetune_output"
         # finetune workflows in general seem to have trouble with multi-gpus
         # use just one atm
 
     def test_oneshot_and_finetune_with_tokenizer(self):
         from datasets import load_dataset
         from transformers import AutoModelForCausalLM, AutoTokenizer
 
-        from llmcompressor.transformers import compress
+        from llmcompressor import oneshot, train
 
         recipe_str = (
             "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml"
@@ -47,23 +47,33 @@ def test_oneshot_and_finetune_with_tokenizer(self):
         max_steps = 50
         splits = {"train": "train[:50%]", "calibration": "train[50%:60%]"}
 
-        compress(
-            model=model_loaded,
+        model_and_data_kwargs = dict(
             dataset=dataset_loaded,
             dataset_config_name=self.dataset_config_name,
-            run_stages=run_stages,
-            output_dir=self.output,
             recipe=recipe_str,
-            max_steps=max_steps,
             concatenate_data=concatenate_data,
             splits=splits,
             tokenizer=tokenizer,
+            output_dir=self.output,
+        )
+
+        oneshot_model = oneshot(
+            model=model_loaded,
+            **model_and_data_kwargs,
+            stage="test_oneshot_stage",
+        )
+        finetune_model = train(
+            run_stages=run_stages,
+            model=oneshot_model,
+            max_steps=max_steps,
+            stage="test_train_stage",
+            **model_and_data_kwargs,
         )
 
         input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
             "cuda"
         )
-        output = model_loaded.generate(input_ids, max_new_tokens=100)
+        output = finetune_model.generate(input_ids, max_new_tokens=20)
         print(tokenizer.decode(output[0]))
 
     def tearDown(self):

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@ test_train_stage:`
`15`	`15`	`"re:.*self_attn.k_proj",`
`16`	`16`	`"re:.*self_attn.v_proj",`
`17`	`17`	`"re:.*self_attn.o_proj",`
	`18`	`+ "re:.*mlp.down_proj",`
`18`	`19`	`"re:.*mlp.gate_proj",`
`19`	`20`	`"re:.*mlp.up_proj"`
`20`	`21`	`]`