pytorch
diff --git a/‎examples/models/llama/config/llm_config.py‎
Lines changed: 82 additions & 87 deletions b/‎examples/models/llama/config/llm_config.py‎
Lines changed: 82 additions & 87 deletions
diff --git a/‎examples/models/llama/config/test_llm_config.py‎
Lines changed: 2 additions & 29 deletions b/‎examples/models/llama/config/test_llm_config.py‎
Lines changed: 2 additions & 29 deletions
@@ -16,6 +16,7 @@
 import ast
 import re
 from dataclasses import dataclass, field
+from enum import Enum
 from typing import ClassVar, List, Optional
 
 
@@ -24,27 +25,32 @@
 ################################################################################
 
 
-MODEL_TYPE_OPTIONS = [
-    "stories110m",
-    "llama2",
-    "llama3",
-    "llama3_1",
-    "llama3_2",
-    "llama3_2_vision",
-    "static_llama",
-    "qwen2_5",
-    "qwen3-0_6b",
-    "qwen3-1_7b",
-    "qwen3-4b",
-    "phi_4_mini",
-    "smollm2",
-]
+class ModelType(str, Enum):
+    stories110m = "stories110m"
+    llama2 = "llama2"
+    llama3 = "llama3"
+    llama3_1 = "llama3_1"
+    llama3_2 = "llama3_2"
+    llama3_2_vision = "llama3_2_vision"
+    static_llama = "static_llama"
+    qwen2_5 = "qwen2_5"
+    qwen3_0_6b = "qwen3-0_6b"
+    qwen3_1_7b = "qwen3-1_7b"
+    qwen3_4b = "qwen3-4b"
+    phi_4_mini = "phi_4_mini"
+    smollm2 = "smollm2"
 
 
-PREQ_MODE_OPTIONS = [
-    "8da4w",
-    "8da4w_output_8da8w",
-]
+class PreqMode(str, Enum):
+    """
+    If you are dealing with pre-quantized checkpoints, this used to
+    be the way to specify them. Now you don't need to specify these
+    options if you use a TorchAo-prequantized checkpoint, but they
+    are still around to preserve backward compatibility.
+    """
+
+    preq_8da4w = "8da4w"
+    preq_8da4w_out_8da8w = "8da4w_output_8da8w"
 
 
 @dataclass
@@ -76,36 +82,34 @@ class BaseConfig:
             are loaded.
     """
 
-    model_class: str = "llama3"
+    model_class: ModelType = ModelType.llama3
     params: Optional[str] = None
     checkpoint: Optional[str] = None
     checkpoint_dir: Optional[str] = None
     tokenizer_path: Optional[str] = None
     metadata: Optional[str] = None
     use_lora: int = 0
     fairseq2: bool = False
-    preq_mode: Optional[str] = None
+    preq_mode: Optional[PreqMode] = None
     preq_group_size: int = 32
     preq_embedding_quantize: str = "8,0"
 
-    def __post_init__(self):
-        if self.model_class not in MODEL_TYPE_OPTIONS:
-            raise ValueError(f"model_class must be one of {MODEL_TYPE_OPTIONS}, got '{self.model_class}'")
-
-        if self.preq_mode is not None and self.preq_mode not in PREQ_MODE_OPTIONS:
-            raise ValueError(f"preq_mode must be one of {PREQ_MODE_OPTIONS}, got '{self.preq_mode}'")
-
 
 ################################################################################
 ################################# ModelConfig ##################################
 ################################################################################
 
 
-DTYPE_OVERRIDE_OPTIONS = [
-    "fp32",
-    "fp16",
-    "bf16",
-]
+class DtypeOverride(str, Enum):
+    """
+    DType of the model. Highly recommended to use "fp32", unless you want to
+    export without a backend, in which case you can also use "bf16". "fp16"
+    is not recommended.
+    """
+
+    fp32 = "fp32"
+    fp16 = "fp16"
+    bf16 = "bf16"
 
 
 @dataclass
@@ -143,7 +147,7 @@ class ModelConfig:
             [16] pattern specifies all layers have a sliding window of 16.
     """
 
-    dtype_override: str = "fp32"
+    dtype_override: DtypeOverride = DtypeOverride.fp32
     enable_dynamic_shape: bool = True
     use_shared_embedding: bool = False
     use_sdpa_with_kv_cache: bool = False
@@ -156,9 +160,6 @@ class ModelConfig:
     local_global_attention: Optional[List[int]] = None
 
     def __post_init__(self):
-        if self.dtype_override not in DTYPE_OVERRIDE_OPTIONS:
-            raise ValueError(f"dtype_override must be one of {DTYPE_OVERRIDE_OPTIONS}, got '{self.dtype_override}'")
-
         self._validate_attention_sink()
         self._validate_local_global_attention()
 
@@ -260,25 +261,31 @@ class DebugConfig:
 ################################################################################
 
 
-PT2E_QUANTIZE_OPTIONS = [
-    "xnnpack_dynamic",
-    "xnnpack_dynamic_qc4",
-    "qnn_8a8w",
-    "qnn_16a16w",
-    "qnn_16a4w",
-    "coreml_c4w",
-    "coreml_8a_c8w",
-    "coreml_8a_c4w",
-    "coreml_baseline_8a_c8w",
-    "coreml_baseline_8a_c4w",
-    "vulkan_8w",
-]
+class Pt2eQuantize(str, Enum):
+    """
+    Type of backend-specific Pt2e quantization strategy to use.
+
+    Pt2e uses a different quantization library that is graph-based
+    compared to `qmode`, which is also specified in the QuantizationConfig
+    and is source transform-based.
+    """
 
+    xnnpack_dynamic = "xnnpack_dynamic"
+    xnnpack_dynamic_qc4 = "xnnpack_dynamic_qc4"
+    qnn_8a8w = "qnn_8a8w"
+    qnn_16a16w = "qnn_16a16w"
+    qnn_16a4w = "qnn_16a4w"
+    coreml_c4w = "coreml_c4w"
+    coreml_8a_c8w = "coreml_8a_c8w"
+    coreml_8a_c4w = "coreml_8a_c4w"
+    coreml_baseline_8a_c8w = "coreml_baseline_8a_c8w"
+    coreml_baseline_8a_c4w = "coreml_baseline_8a_c4w"
+    vulkan_8w = "vulkan_8w"
 
-SPIN_QUANT_OPTIONS = [
-    "cuda",
-    "native",
-]
+
+class SpinQuant(str, Enum):
+    cuda = "cuda"
+    native = "native"
 
 
 @dataclass
@@ -313,22 +320,16 @@ class QuantizationConfig:
 
     qmode: Optional[str] = None
     embedding_quantize: Optional[str] = None
-    pt2e_quantize: Optional[str] = None
+    pt2e_quantize: Optional[Pt2eQuantize] = None
     group_size: Optional[int] = None
-    use_spin_quant: Optional[str] = None
+    use_spin_quant: Optional[SpinQuant] = None
     use_qat: bool = False
     calibration_tasks: Optional[List[str]] = None
     calibration_limit: Optional[int] = None
     calibration_seq_length: Optional[int] = None
     calibration_data: str = "Once upon a time"
 
     def __post_init__(self):
-        if self.pt2e_quantize is not None and self.pt2e_quantize not in PT2E_QUANTIZE_OPTIONS:
-            raise ValueError(f"pt2e_quantize must be one of {PT2E_QUANTIZE_OPTIONS}, got '{self.pt2e_quantize}'")
-
-        if self.use_spin_quant is not None and self.use_spin_quant not in SPIN_QUANT_OPTIONS:
-            raise ValueError(f"use_spin_quant must be one of {SPIN_QUANT_OPTIONS}, got '{self.use_spin_quant}'")
-
         if self.qmode:
             self._validate_qmode()
 
@@ -376,18 +377,16 @@ class XNNPackConfig:
     extended_ops: bool = False
 
 
-COREML_QUANTIZE_OPTIONS = [
-    "b4w",
-    "c4w",
-]
+class CoreMLQuantize(str, Enum):
+    b4w = "b4w"
+    c4w = "c4w"
 
 
-COREML_COMPUTE_UNIT_OPTIONS = [
-    "cpu_only",
-    "cpu_and_gpu",
-    "cpu_and_ne",
-    "all",
-]
+class CoreMLComputeUnit(str, Enum):
+    cpu_only = "cpu_only"
+    cpu_and_gpu = "cpu_and_gpu"
+    cpu_and_ne = "cpu_and_ne"
+    all = "all"
 
 
 @dataclass
@@ -399,17 +398,11 @@ class CoreMLConfig:
     enabled: bool = False
     enable_state: bool = False
     preserve_sdpa: bool = False
-    quantize: Optional[str] = None
+    quantize: Optional[CoreMLQuantize] = None
     ios: int = 15
-    compute_units: str = "cpu_only"
+    compute_units: CoreMLComputeUnit = CoreMLComputeUnit.cpu_only
 
     def __post_init__(self):
-        if self.quantize is not None and self.quantize not in COREML_QUANTIZE_OPTIONS:
-            raise ValueError(f"quantize must be one of {COREML_QUANTIZE_OPTIONS}, got '{self.quantize}'")
-
-        if self.compute_units not in COREML_COMPUTE_UNIT_OPTIONS:
-            raise ValueError(f"compute_units must be one of {COREML_COMPUTE_UNIT_OPTIONS}, got '{self.compute_units}'")
-
         if self.ios not in (15, 16, 17, 18):
             raise ValueError(f"Invalid coreml ios version: {self.ios}")
 
@@ -488,7 +481,7 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
 
         # BaseConfig
         if hasattr(args, "model"):
-            llm_config.base.model_class = args.model
+            llm_config.base.model_class = ModelType(args.model)
         if hasattr(args, "params"):
             llm_config.base.params = args.params
         if hasattr(args, "checkpoint"):
@@ -506,15 +499,15 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
 
         # PreqMode settings
         if hasattr(args, "preq_mode") and args.preq_mode:
-            llm_config.base.preq_mode = args.preq_mode
+            llm_config.base.preq_mode = PreqMode(args.preq_mode)
             if hasattr(args, "preq_group_size"):
                 llm_config.base.preq_group_size = args.preq_group_size
             if hasattr(args, "preq_embedding_quantize"):
                 llm_config.base.preq_embedding_quantize = args.preq_embedding_quantize
 
         # ModelConfig
         if hasattr(args, "dtype_override"):
-            llm_config.model.dtype_override = args.dtype_override
+            llm_config.model.dtype_override = DtypeOverride(args.dtype_override)
         if hasattr(args, "enable_dynamic_shape"):
             llm_config.model.enable_dynamic_shape = args.enable_dynamic_shape
         if hasattr(args, "use_shared_embedding"):
@@ -556,11 +549,11 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
         if hasattr(args, "embedding_quantize"):
             llm_config.quantization.embedding_quantize = args.embedding_quantize
         if hasattr(args, "pt2e_quantize") and args.pt2e_quantize:
-            llm_config.quantization.pt2e_quantize = args.pt2e_quantize
+            llm_config.quantization.pt2e_quantize = Pt2eQuantize(args.pt2e_quantize)
         if hasattr(args, "group_size"):
             llm_config.quantization.group_size = args.group_size
         if hasattr(args, "use_spin_quant") and args.use_spin_quant:
-            llm_config.quantization.use_spin_quant = args.use_spin_quant
+            llm_config.quantization.use_spin_quant = SpinQuant(args.use_spin_quant)
         if hasattr(args, "use_qat"):
             llm_config.quantization.use_qat = args.use_qat
         if hasattr(args, "calibration_tasks"):
@@ -588,11 +581,13 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
             args, "coreml_preserve_sdpa", False
         )
         if hasattr(args, "coreml_quantize") and args.coreml_quantize:
-            llm_config.backend.coreml.quantize = args.coreml_quantize
+            llm_config.backend.coreml.quantize = CoreMLQuantize(args.coreml_quantize)
         if hasattr(args, "coreml_ios"):
             llm_config.backend.coreml.ios = args.coreml_ios
         if hasattr(args, "coreml_compute_units"):
-            llm_config.backend.coreml.compute_units = args.coreml_compute_units
+            llm_config.backend.coreml.compute_units = CoreMLComputeUnit(
+                args.coreml_compute_units
+            )
 
         # Vulkan
         if hasattr(args, "vulkan"):
 
@@ -11,6 +11,7 @@
 from executorch.examples.models.llama.config.llm_config import (
     BackendConfig,
     BaseConfig,
+    CoreMLComputeUnit,
     CoreMLConfig,
     DebugConfig,
     ExportConfig,
@@ -65,34 +66,6 @@ def test_shared_embedding_without_lowbit(self):
         with self.assertRaises(ValueError):
             LlmConfig(model=model_cfg, quantization=qcfg)
 
-    def test_invalid_model_type(self):
-        with self.assertRaises(ValueError):
-            BaseConfig(model_class="invalid_model")
-
-    def test_invalid_dtype_override(self):
-        with self.assertRaises(ValueError):
-            ModelConfig(dtype_override="invalid_dtype")
-
-    def test_invalid_preq_mode(self):
-        with self.assertRaises(ValueError):
-            BaseConfig(preq_mode="invalid_preq")
-
-    def test_invalid_pt2e_quantize(self):
-        with self.assertRaises(ValueError):
-            QuantizationConfig(pt2e_quantize="invalid_pt2e")
-
-    def test_invalid_spin_quant(self):
-        with self.assertRaises(ValueError):
-            QuantizationConfig(use_spin_quant="invalid_spin")
-
-    def test_invalid_coreml_quantize(self):
-        with self.assertRaises(ValueError):
-            CoreMLConfig(quantize="invalid_quantize")
-
-    def test_invalid_coreml_compute_units(self):
-        with self.assertRaises(ValueError):
-            CoreMLConfig(compute_units="invalid_compute_units")
-
 
 class TestValidConstruction(unittest.TestCase):
 
@@ -121,7 +94,7 @@ def test_valid_llm_config(self):
             backend=BackendConfig(
                 xnnpack=XNNPackConfig(enabled=False),
                 coreml=CoreMLConfig(
-                    enabled=True, ios=17, compute_units="all"
+                    enabled=True, ios=17, compute_units=CoreMLComputeUnit.ALL
                 ),
             ),
         )