remove device arg from e2e

kylesayrs · kylesayrs · commit 7d7b00d09e1d · 2025-06-16T14:57:39.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/entrypoints/README.md b/src/llmcompressor/entrypoints/README.md
@@ -112,7 +112,6 @@ output_dir = "./oneshot_model"
 # The model to train
 model = AutoModelForCausalLM.from_pretrained(
     output_dir,
-    device_map="auto",
     quantization_config=CompressedTensorsConfig(run_compressed=False),
 )
 
@@ -146,7 +145,6 @@ Comparisons are defined in `/src/llmcompressor/modifiers/distillation/utils/pyto
 # Define the teacher model
 distill_teacher = AutoModelForCausalLM.from_pretrained(
     "meta-llama/Meta-Llama-3-8B-Instruct",  
-    device_map="auto",
 )
 
 # Define the recipe, use knowledge distillation modifier and target the `model.layers` using a regex with
@@ -236,7 +234,6 @@ num_calibration_samples = 8  # The number of workers processing datasets in para
 # Define teacher model
 distill_teacher = AutoModelForCausalLM.from_pretrained(
     "meta-llama/Meta-Llama-3-8B-Instruct",
-    device_map="auto",
 )
 
 # Define the recipe, use knowledge distillation modifier and target the `model.layers` using a regex with
diff --git a/src/llmcompressor/entrypoints/utils.py b/src/llmcompressor/entrypoints/utils.py
@@ -86,7 +86,8 @@ def post_process(
         ValueError: If saving fails due to an invalid `output_dir` or other issues.
     """
     # remove any existing dispatches
-    remove_dispatch(model_args.model)
+    if model_args is not None and model_args.model is not None:
+        remove_dispatch(model_args.model)
 
     if model_args is not None and output_dir is not None:
         if recipe_args is not None and getattr(recipe_args, "stage", None) is not None:
diff --git a/tests/e2e/e2e_utils.py b/tests/e2e/e2e_utils.py
@@ -14,28 +14,21 @@
 def _load_model_and_processor(
     model: str,
     model_class: str,
-    device: str,
 ):
     pretrained_model_class = getattr(transformers, model_class)
-    loaded_model = pretrained_model_class.from_pretrained(
-        model, device_map=device, torch_dtype="auto"
-    )
+    loaded_model = pretrained_model_class.from_pretrained(model, torch_dtype="auto")
     processor = AutoProcessor.from_pretrained(model)
     return loaded_model, processor
 
 
 @log_time
-def _run_oneshot(device: str, **oneshot_kwargs):
-    oneshot(
-        **oneshot_kwargs,
-        oneshot_device=device,
-    )
+def _run_oneshot(**oneshot_kwargs):
+    oneshot(**oneshot_kwargs)
 
 
 def run_oneshot_for_e2e_testing(
     model: str,
     model_class: str,
-    device: str,
     num_calibration_samples: int,
     max_seq_length: int,
     dataset_id: str,
@@ -49,7 +42,7 @@ def run_oneshot_for_e2e_testing(
     oneshot_kwargs = {}
 
     loaded_model, processor = _load_model_and_processor(
-        model=model, model_class=model_class, device=device
+        model=model, model_class=model_class
     )
 
     if dataset_id:
@@ -86,6 +79,6 @@ def data_collator(batch):
 
     # Apply quantization.
     logger.info("ONESHOT KWARGS", oneshot_kwargs)
-    _run_oneshot(device=device, **oneshot_kwargs)
+    _run_oneshot(**oneshot_kwargs)
 
     return oneshot_kwargs["model"], processor
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
@@ -88,7 +88,6 @@ def set_up(self, test_data_file: str):
         logger.info("========== RUNNING ==============")
         logger.info(self.save_dir)
 
-        self.device = "cuda:0"
         self.prompts = [
             "The capital of France is",
             "The president of the US is",
@@ -105,7 +104,6 @@ def test_vllm(self, test_data_file: str):
         oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
             model=self.model,
             model_class=self.model_class,
-            device=self.device,
             num_calibration_samples=self.num_calibration_samples,
             max_seq_length=self.max_seq_length,
             scheme=self.scheme,

Original file line number	Diff line number	Diff line change
`@@ -112,7 +112,6 @@ output_dir = "./oneshot_model"`
`112`	`112`	`# The model to train`
`113`	`113`	`model = AutoModelForCausalLM.from_pretrained(`
`114`	`114`	`output_dir,`
`115`		`- device_map="auto",`
`116`	`115`	`quantization_config=CompressedTensorsConfig(run_compressed=False),`
`117`	`116`	`)`
`118`	`117`
@@ -146,7 +145,6 @@ Comparisons are defined in `/src/llmcompressor/modifiers/distillation/utils/pyto
`146`	`145`	`# Define the teacher model`
`147`	`146`	`distill_teacher = AutoModelForCausalLM.from_pretrained(`
`148`	`147`	`"meta-llama/Meta-Llama-3-8B-Instruct",`
`149`		`- device_map="auto",`
`150`	`148`	`)`
`151`	`149`
`152`	`150`	# Define the recipe, use knowledge distillation modifier and target the `model.layers` using a regex with
`@@ -236,7 +234,6 @@ num_calibration_samples = 8 # The number of workers processing datasets in para`
`236`	`234`	`# Define teacher model`
`237`	`235`	`distill_teacher = AutoModelForCausalLM.from_pretrained(`
`238`	`236`	`"meta-llama/Meta-Llama-3-8B-Instruct",`
`239`		`- device_map="auto",`
`240`	`237`	`)`
`241`	`238`
`242`	`239`	# Define the recipe, use knowledge distillation modifier and target the `model.layers` using a regex with