Remove default bos/eos from metadata (pytorch#15231)

lucylq · facebook-github-bot · commit 0e547c1ec985 · 2025-10-17T13:49:51.000-07:00
Summary: See: pytorch#15215 Currently: - default eos/bos tokens are embedded into the pte - llama3 instruct has a different set of eos/bos tokens - users must manually specify at export time the llama3 instruct eos/bos tokens, because the runner overrides tokenizer eos/bos with the values in the PTE This diff: - removes the defaults - rely on tokenizer for eos/bos UNLESS the user explicitly specifies in the metadata, in which case use the eos/bos saved in PTE. Reviewed By: jackzhxng Differential Revision: D84942718
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -15,7 +15,6 @@
 import logging
 import re
 import shlex
-from enum import Enum
 from functools import partial
 
 from importlib import resources as _resources
@@ -121,11 +120,6 @@
 }
 
 
-class WeightType(Enum):
-    LLAMA = "LLAMA"
-    FAIRSEQ2 = "FAIRSEQ2"
-
-
 def set_pkg_name(name: str) -> None:
     global pkg_name
     pkg_name = name
@@ -1247,7 +1241,6 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager:  # noqa: C901
 
 
 def _load_llama_model_metadata(
-    weight_type: WeightType,
     use_kv_cache: bool,
     use_sdpa_with_kv_cache: bool,
     enable_dynamic_shape: bool,
@@ -1257,10 +1250,7 @@ def _load_llama_model_metadata(
     vocab_size: int,
     metadata_str: Optional[str] = None,
 ):
-    is_fairseq2 = weight_type == WeightType.FAIRSEQ2
     metadata = {
-        "get_bos_id": 3 if is_fairseq2 else 1,
-        "get_eos_ids": [3] if is_fairseq2 else [2],
         "get_max_seq_len": max_seq_len,
         "get_max_context_len": max_context_len,
         "get_n_layers": n_layers,
@@ -1332,7 +1322,6 @@ def _load_llama_model(llm_config: LlmConfig) -> "LLMEdgeManager":
         save_exported_program=llm_config.export.export_only,
         verbose=llm_config.debug.verbose,
         metadata=_load_llama_model_metadata(
-            WeightType.FAIRSEQ2 if llm_config.base.fairseq2 else WeightType.LLAMA,
             llm_config.model.use_kv_cache,
             llm_config.model.use_sdpa_with_kv_cache,
             llm_config.model.enable_dynamic_shape,