remove llmcompressor.transformers.oneshot references (#1729)

brian-dellabetta · web-flow · commit f92d98a44118 · 2025-08-13T13:56:36.000-04:00
SUMMARY:
Remove all references to `llmcompressor.transformers.oneshot` in our
examples


TEST PLAN:
n/a

Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/examples/big_models_with_sequential_onloading/llama3.3_70b.py b/examples/big_models_with_sequential_onloading/llama3.3_70b.py
@@ -1,9 +1,9 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
 from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.utils import dispatch_for_generation
 
 # Select model and load it.
diff --git a/examples/multimodal_vision/qwen_2_5_vl_example.py b/examples/multimodal_vision/qwen_2_5_vl_example.py
@@ -6,8 +6,8 @@
 from qwen_vl_utils import process_vision_info
 from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.utils import dispatch_for_generation
 
 # Load model.
diff --git a/examples/quantization_w4a16/llama3_example.py b/examples/quantization_w4a16/llama3_example.py
@@ -1,8 +1,8 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.utils import dispatch_for_generation
 
 # Select model and load it.
diff --git a/examples/quantization_w8a8_fp8/fp8_block_example.py b/examples/quantization_w8a8_fp8/fp8_block_example.py
@@ -16,7 +16,9 @@
 #   * quantize the weights to fp8 with per channel via ptq
 #   * quantize the activations to fp8 with dynamic per token
 recipe = QuantizationModifier(
-    targets="Linear", scheme="FP8_BLOCK", ignore=["lm_head", "re:.*mlp.gate$"],
+    targets="Linear",
+    scheme="FP8_BLOCK",
+    ignore=["lm_head", "re:.*mlp.gate$"],
 )
 
 # Apply quantization.
diff --git a/examples/quantizing_moe/deepseek_r1_example.py b/examples/quantizing_moe/deepseek_r1_example.py
@@ -1,9 +1,9 @@
 from datasets import load_dataset
 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modeling import replace_modules_for_calibration
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 
 # Select model and load it.
 
diff --git a/examples/quantizing_moe/qwen_example.py b/examples/quantizing_moe/qwen_example.py
@@ -2,8 +2,8 @@
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from llmcompressor import oneshot
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
 from llmcompressor.utils import dispatch_for_generation
 
 # select a Mixture of Experts model for quantization