comments

George · George · commit eb3094c5c338 · 2025-02-20T20:51:32.000Z
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@ Quantization is applied by selecting an algorithm and calling the `oneshot` API.
 ```python
 from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
 from llmcompressor.modifiers.quantization import GPTQModifier
-from llmcompressor.transformers import oneshot
+from llmcompressor import oneshot
 
 # Select quantization algorithm. In this case, we:
 #   * apply SmoothQuant to make the activations easier to quantize
diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
@@ -33,7 +33,6 @@
 bf16 = False  # using full precision for training
 lr_scheduler_type = "cosine"
 warmup_ratio = 0.1
-preprocessing_num_workers = 8
 
 # this will run the recipe stage by stage:
 # oneshot sparsification -> finetuning -> oneshot quantization
@@ -53,7 +52,6 @@
     learning_rate=learning_rate,
     lr_scheduler_type=lr_scheduler_type,
     warmup_ratio=warmup_ratio,
-    preprocessing_num_workers=preprocessing_num_workers,
 )
 logger.info(
     "Note: llcompressor does not currently support running ",