oneshot entrypoint update (#1445)

ved1beta · brian-dellabetta · web-flow · commit a18c76a51939 · 2025-06-17T13:14:30.000-05:00
SUMMARY: Updated the oneshot function to use explicit keyword arguments instead of relying solely on **kwargs. TEST PLAN: Changes were tested using mock tests that verify: Correct handling of explicit arguments Backward compatibility with kwargs Proper behavior with mixed explicit args and kwargs Verification that kwargs correctly override explicit parameters when duplicates exist fixes #1443 TODO: - [x] run nightly tests on branch -- https://github.com/neuralmagic/llm-compressor-testing/actions/runs/15710803245 ✅ --------- Co-authored-by: Brian Dellabetta <brian-dellabetta@users.noreply.github.com>
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
@@ -1,10 +1,10 @@
 import os
 from datetime import datetime
-from typing import Optional
+from typing import TYPE_CHECKING, List, Optional, Union
 
 from loguru import logger
 from torch.utils.data import DataLoader
-from transformers import PreTrainedModel
+from transformers import PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin
 
 from llmcompressor.args import parse_args
 from llmcompressor.core.session_functions import active_session
@@ -14,6 +14,9 @@
 
 __all__ = ["Oneshot", "oneshot"]
 
+if TYPE_CHECKING:
+    from datasets import Dataset, DatasetDict
+
 
 class Oneshot:
     """
@@ -102,7 +105,8 @@ def __init__(
         :param recipe_args: RecipeArguments parameters, responsible for containing
             recipe-related parameters
         :param output_dir: Path to save the output model after carrying out oneshot
-
+        :param log_dir: Path to save logs during oneshot run.
+            Nothing is logged to file if None.
         """
         # Set up logging
         if log_dir:
@@ -191,8 +195,119 @@ def apply_recipe_modifiers(
         session.finalize()
 
 
-def oneshot(**kwargs) -> PreTrainedModel:
-    one_shot = Oneshot(**kwargs)
+def oneshot(
+    # Model arguments
+    model: Union[str, PreTrainedModel],
+    distill_teacher: Optional[str] = None,
+    config_name: Optional[str] = None,
+    tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None,
+    processor: Optional[Union[str, ProcessorMixin]] = None,
+    cache_dir: Optional[str] = None,
+    use_auth_token: bool = False,
+    precision: str = "auto",
+    tie_word_embeddings: bool = False,
+    trust_remote_code_model: bool = False,
+    save_compressed: bool = True,
+    oneshot_device: str = "cuda:0",
+    model_revision: str = "main",
+    # Recipe arguments
+    recipe: Optional[Union[str, List[str]]] = None,
+    recipe_args: Optional[List[str]] = None,
+    clear_sparse_session: bool = False,
+    stage: Optional[str] = None,
+    # Dataset arguments
+    dataset: Optional[Union[str, "Dataset", "DatasetDict"]] = None,
+    dataset_config_name: Optional[str] = None,
+    dataset_path: Optional[str] = None,
+    num_calibration_samples: int = 512,
+    shuffle_calibration_samples: bool = True,
+    max_seq_length: int = 384,
+    pad_to_max_length: bool = True,
+    text_column: str = "text",
+    concatenate_data: bool = False,
+    streaming: bool = False,
+    overwrite_cache: bool = False,
+    preprocessing_num_workers: Optional[int] = None,
+    min_tokens_per_module: Optional[float] = None,
+    trust_remote_code_data: bool = False,
+    # Miscellaneous arguments
+    output_dir: Optional[str] = None,
+    log_dir: Optional[str] = "sparse_logs",
+    **kwargs,
+) -> PreTrainedModel:
+    """
+    Performs oneshot calibration on a model.
+
+    # Model arguments
+    :param model: A pretrained model identifier from huggingface.co/models or a path
+        to a local model. Required parameter.
+    :param distill_teacher: Teacher model (a trained text generation model)
+        for distillation.
+    :param config_name: Pretrained config name or path if not the same as
+        model_name.
+    :param tokenizer: Pretrained tokenizer name or path if not the same as
+        model_name.
+    :param processor: Pretrained processor name or path if not the same as
+        model_name.
+    :param cache_dir: Where to store the pretrained data from
+        huggingface.co.
+    :param use_auth_token: Whether to use Hugging Face auth token for private
+        models.
+    :param precision: Precision to cast model weights to, default to auto.
+    :param tie_word_embeddings: Whether the model's input and output word embeddings
+        should be tied.
+    :param trust_remote_code_model: Whether to allow for custom models to execute
+        their own modeling files.
+    :param save_compressed: Whether to compress sparse models during save.
+    :param oneshot_device: Device to run oneshot calibration on.
+    :param model_revision: The specific model version to use (can be branch name,
+        tag, or commit id).
+
+    # Recipe arguments
+    :param recipe: Path to a LLM Compressor sparsification recipe.
+    :param recipe_args: List of recipe arguments to evaluate, in the
+        format "key1=value1", "key2=value2".
+    :param clear_sparse_session: Whether to clear CompressionSession/
+        CompressionLifecycle data between runs.
+    :param stage: The stage of the recipe to use for oneshot.
+
+    # Dataset arguments
+    :param dataset: The name of the dataset to use (via the datasets
+        library).
+    :param dataset_config_name: The configuration name of the dataset
+        to use.
+    :param dataset_path: Path to a custom dataset. Supports json, csv, dvc.
+    :param num_calibration_samples: Number of samples to use for one-shot
+        calibration.
+    :param shuffle_calibration_samples: Whether to shuffle the dataset before
+        calibration.
+    :param max_seq_length: Maximum total input sequence length after tokenization.
+    :param pad_to_max_length: Whether to pad all samples to `max_seq_length`.
+    :param text_column: Key to use as the `text` input to tokenizer/processor.
+    :param concatenate_data: Whether to concatenate datapoints to fill
+        max_seq_length.
+    :param streaming: True to stream data from a cloud dataset.
+    :param overwrite_cache: Whether to overwrite the cached preprocessed datasets.
+    :param preprocessing_num_workers: Number of processes for
+        preprocessing.
+    :param min_tokens_per_module: Minimum percentage of tokens per
+        module, relevant for MoE models.
+    :param trust_remote_code_data: Whether to allow for datasets defined on the Hub
+        using a dataset script.
+
+    # Miscellaneous arguments
+    :param output_dir: Path to save the output model after calibration.
+        Nothing is saved if None.
+    :param log_dir: Path to save logs during oneshot run.
+        Nothing is logged to file if None.
+
+    :return: The calibrated PreTrainedModel
+    """
+
+    # pass all args directly into Oneshot
+    local_args = locals()
+    local_args.pop("kwargs")
+    one_shot = Oneshot(**local_args, **kwargs)
     one_shot()
 
     return one_shot.model