Update base for Update on "Introduce hydra framework with backwards compatibility"

jackzhxng · jackzhxng · commit 7f6f046cec48 · 2025-05-22T13:24:27.000-07:00
[ghstack-poisoned]
diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py
@@ -13,24 +13,69 @@
 Uses dataclases, which integrate with OmegaConf and Hydra.
 """
 
+import re
 from dataclasses import dataclass, field
-from typing import List, Optional
+from enum import Enum
+from typing import List, Literal, Optional
+
+
+################################################################################
+################################## BaseConfig ##################################
+################################################################################
+
+
+class ModelType(str, Enum):
+    STORIES110M = "stories110m"
+    LLAMA2 = "llama2"
+    LLAMA3 = "llama3"
+    LLAMA3_1 = "llama3_1"
+    LLAMA3_2 = "llama3_2"
+    LLAMA3_2_VISION = "llama3_2_vision"
+    STATIC_LLAMA = "static_llama"
+    QWEN2_5 = "qwen2_5"
+    QWEN3_0_6B = "qwen3-0_6b"
+    QWEN3_1_7B = "qwen3-1_7b"
+    QWEN3_4B = "qwen3-4b"
+    PHI_4_MINI = "phi_4_mini"
+    SMOLLM2 = "smollm2"
+
+
+class PreqMode(str, Enum):
+    PREQ_8DA4W = "8da4w"
+    PREQ_8DA4W_OUT_8DA8W = "8da4w_output_8da8w"
 
 
 @dataclass
 class BaseConfig:
     """
     These are specific to the specific model, e.g. whether it’s Qwen3 0.6B or Phi-4-mini.
-    for each of these different models, you can expect each of these fields to change.
+    For each of these different models, you can expect each of these fields to change.
     """
 
-    model_class: str = "llama"
+    model_class: ModelType = ModelType.LLAMA3
     params: Optional[str] = None
     checkpoint: Optional[str] = None
-    checkpoint_dir: Optional[str] = None  # For sharded checkpoint
+    checkpoint_dir: Optional[str] = None  # For sharded checkpoint.
     tokenizer_path: Optional[str] = None
     metadata: Optional[str] = None
-    fairseq2: bool = False  # For legacy internal use cases
+    use_lora: bool = False
+    fairseq2: bool = False  # For legacy internal use cases.
+
+    # Legacy pre-quantization options that happen during model weight loading.
+    preq_mode: Optional[PreqMode] = None
+    preq_group_size: int = 32
+    preq_embedding_quantize: str = "8,0"
+
+
+################################################################################
+################################# ModelConfig ##################################
+################################################################################
+
+
+class DtypeOverride(str, Enum):
+    FP32 = "fp32"
+    FP16 = "fp16"
+    BF16 = "bf16"
 
 
 @dataclass
@@ -42,29 +87,39 @@ class ModelConfig:
     to different models.
     """
 
-    dtype_override: str = "fp32"
+    dtype_override: DtypeOverride = DtypeOverride.FP32
     enable_dynamic_shape: bool = True
     use_shared_embedding: bool = False
-    use_lora: bool = False
     use_sdpa_with_kv_cache: bool = False
     expand_rope_table: bool = False
+    use_attention_sink: Optional[str] = None
     output_prune_map: Optional[str] = None
     input_prune_map: Optional[str] = None
 
     # Below are config options relating to kv cache.
-    use_kv_cache: Optional[bool] = None
-    quantize_kv_cache: Optional[bool] = None
-    local_global_attention: List[int] = None
+    use_kv_cache: bool = False
+    quantize_kv_cache: bool = False
+    local_global_attention: Optional[List[int]] = None
+
+
+################################################################################
+################################ ExportConfig ##################################
+################################################################################
 
 
 @dataclass
 class ExportConfig:
-    max_seq_length: Optional[int] = None
-    max_context_length: Optional[int] = None
+    max_seq_length: int = 128
+    max_context_length: int = 128
     output_dir: Optional[str] = None
     output_name: Optional[str] = None
     so_library: Optional[str] = None
-    export_only: Optional[bool] = None
+    export_only: bool = False
+
+
+################################################################################
+################################# DebugConfig ##################################
+################################################################################
 
 
 @dataclass
@@ -73,45 +128,101 @@ class DebugConfig:
     profile_path: Optional[str] = None
     generate_etrecord: bool = False
     generate_full_logits: bool = False
-    verbose: bool = False  # Would be good to remove this from the config eventually
+    verbose: bool = False
+
+
+################################################################################
+############################# QuantizationConfig ###############################
+################################################################################
 
 
-########################################################################
-#### The below config can eventually be replaced by export recipes #####
-########################################################################
+class Pt2eQuantize(str, Enum):
+    XNNPACK_DYNAMIC = "xnnpack_dynamic"
+    XNNPACK_DYNAMIC_QC4 = "xnnpack_dynamic_qc4"
+    QNN_8A8W = "qnn_8a8w"
+    QNN_16A16W = "qnn_16a16w"
+    QNN_16A4W = "qnn_16a4w"
+    COREML_C4W = "coreml_c4w"
+    COREML_8A_C8W = "coreml_8a_c8w"
+    COREML_8A_C4W = "coreml_8a_c4w"
+    COREML_BASELINE_8A_C8W = "coreml_baseline_8a_c8w"
+    COREML_BASELINE_8A_C4W = "coreml_baseline_8a_c4w"
+    VULKAN_8W = "vulkan_8w"
+
+
+class SpinQuant(str, Enum):
+    CUDA = "cuda"
+    NATIVE = "native"
 
 
 @dataclass
 class QuantizationConfig:
     qmode: Optional[str] = None
-    embedding_quantize: Optional[bool] = None
-    pt2e_quantize: Optional[bool] = None
+    embedding_quantize: Optional[str] = None
+    pt2e_quantize: Optional[Pt2eQuantize] = None
     group_size: Optional[int] = None
-    use_spin_quant: Optional[bool] = None
+    use_spin_quant: Optional[SpinQuant] = None
     use_qat: Optional[bool] = None
-    preq_mode: Optional[str] = None
-    preq_group_size: Optional[int] = None
-    preq_embedding_quantize: Optional[bool] = None
-    calibration_tasks: Optional[str] = None
+    calibration_tasks: Optional[List[str]] = None
     calibration_limit: Optional[int] = None
     calibration_seq_length: Optional[int] = None
     calibration_data: Optional[str] = None
 
+    def __post_init__(self):
+        self._validate_qmode()
+
+    def _validate_qmode(self) -> None:
+        choices = ["int8", "8da4w", "8da4w-gptq", "vulkan_4w"]
+        patterns = [r"torchao:8da(\d+)w", r"torchao:fpa(\d+)w"]
+
+        if self.qmode in choices:
+            return
+
+        for pattern in patterns:
+            matches = re.findall(pattern, self.qmode)
+            if len(matches) == 1:
+                return
+
+        raise ValueError(
+            f"Got qmode {self.qmode}, but expected one of {choices}, or one of the regex patterns {patterns}."
+        )
+
+
+################################################################################
+############################### BackendConfig ##################################
+################################################################################
+
 
 @dataclass
 class XNNPackConfig:
-    enabled: Optional[bool] = None
-    extended_ops: Optional[bool] = None
+    enabled: bool = False
+    extended_ops: bool = False
+
+
+class CoreMLQuantize(str, Enum):
+    B4W = "b4w"
+    C4W = "c4w"
+
+
+class CoreMLComputeUnit(str, Enum):
+    CPU_ONLY = "cpu_only"
+    CPU_AND_GPU = "cpu_and_gpu"
+    CPU_AND_NE = "cpu_and_ne"
+    ALL = "all"
 
 
 @dataclass
-class CoreMLConfig:  # coreML recipe?
-    enabled: Optional[bool] = None
-    enable_state: Optional[bool] = None
-    preserve_sdpa: Optional[bool] = None
-    quantize: Optional[bool] = None
-    ios: Optional[bool] = None
-    compute_units: Optional[str] = None
+class CoreMLConfig:
+    enabled: bool = False
+    enable_state: bool = False
+    preserve_sdpa: bool = False
+    quantize: Optional[CoreMLQuantize] = None
+    ios: Literal[15, 16, 17, 18] = 15
+    compute_units: CoreMLComputeUnit = CoreMLComputeUnit.CPU_ONLY
+
+    def __post_init__(self):
+        if self.ios not in (15, 16, 17, 18):
+            raise ValueError(f"Invalid coreml ios version: {self.ios}")
 
 
 @dataclass
@@ -143,6 +254,11 @@ class BackendConfig:
     mps: MPSConfig = field(default_factory=MPSConfig)
 
 
+################################################################################
+################################## LlmConfig ###################################
+################################################################################
+
+
 @dataclass
 class LlmConfig:
     base: BaseConfig = field(default_factory=BaseConfig)