Update on "Remove sharded ckpt from export_llama"

lucylq · lucylq · commit 243125ff96ca · 2025-11-24T15:35:20.000-08:00
Sharded checkpoint isn't used anymore; removing it and simplifying export_llama. Differential Revision: [D87828518](https://our.internmc.facebook.com/intern/diff/D87828518/) [ghstack-poisoned]
diff --git a/extension/llm/export/config/llm_config.py b/extension/llm/export/config/llm_config.py
@@ -86,6 +86,10 @@ class BaseConfig:
             e.g. '"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"'
         use_lora: Only for use with QAT. Rank of the LoRA adapter, disabled
             if set to 0.
+        fairseq2: For legacy internal use cases, this is safe to ignore.
+        preq_mode: Legacy option to specify how prequantized weights are loaded.
+            Going forward, ExecuTorch supports loading weights prequantized through
+            TorchAo as-is, without any special handling.
         preq_group_size: Legacy option to specify the group size of prequantized weights.
         preq_embedding_quantize: Legacy option to specify how prequantized embeddings
             are loaded.
@@ -99,6 +103,7 @@ class BaseConfig:
     tokenizer_path: Optional[str] = None
     metadata: Optional[str] = None
     use_lora: int = 0
+    fairseq2: bool = False
     preq_mode: Optional[PreqMode] = None
     preq_group_size: int = 32
     preq_embedding_quantize: str = "8,0"
@@ -530,6 +535,8 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig":  # noqa: C901
             llm_config.base.metadata = args.metadata
         if hasattr(args, "use_lora"):
             llm_config.base.use_lora = args.use_lora
+        if hasattr(args, "fairseq2"):
+            llm_config.base.fairseq2 = args.fairseq2
 
         # PreqMode settings
         if hasattr(args, "preq_mode") and args.preq_mode: