From 0e547c1ec9855db1015d58910bee47afa8b1042e Mon Sep 17 00:00:00 2001
From: Lucy Qiu <lfq@meta.com>
Date: Fri, 17 Oct 2025 13:49:51 -0700
Subject: [PATCH] Remove default bos/eos from metadata (#15231)

Summary:

See: https://github.com/pytorch/executorch/pull/15215

Currently:
- default eos/bos tokens are embedded into the pte
- llama3 instruct has a different set of eos/bos tokens
- users must manually specify at export time the llama3 instruct eos/bos tokens, because the runner overrides tokenizer eos/bos with the values in the PTE

This diff:
- removes the defaults
- rely on tokenizer for eos/bos UNLESS the user explicitly specifies in the metadata, in which case use the eos/bos saved in PTE.

Reviewed By: jackzhxng

Differential Revision: D84942718
---
 examples/models/llama/export_llama_lib.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index b1cfb058882..0d6dc87de2f 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -15,7 +15,6 @@
 import logging
 import re
 import shlex
-from enum import Enum
 from functools import partial
 
 from importlib import resources as _resources
@@ -121,11 +120,6 @@
 }
 
 
-class WeightType(Enum):
-    LLAMA = "LLAMA"
-    FAIRSEQ2 = "FAIRSEQ2"
-
-
 def set_pkg_name(name: str) -> None:
     global pkg_name
     pkg_name = name
@@ -1247,7 +1241,6 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager:  # noqa: C901
 
 
 def _load_llama_model_metadata(
-    weight_type: WeightType,
     use_kv_cache: bool,
     use_sdpa_with_kv_cache: bool,
     enable_dynamic_shape: bool,
@@ -1257,10 +1250,7 @@ def _load_llama_model_metadata(
     vocab_size: int,
     metadata_str: Optional[str] = None,
 ):
-    is_fairseq2 = weight_type == WeightType.FAIRSEQ2
     metadata = {
-        "get_bos_id": 3 if is_fairseq2 else 1,
-        "get_eos_ids": [3] if is_fairseq2 else [2],
         "get_max_seq_len": max_seq_len,
         "get_max_context_len": max_context_len,
         "get_n_layers": n_layers,
@@ -1332,7 +1322,6 @@ def _load_llama_model(llm_config: LlmConfig) -> "LLMEdgeManager":
         save_exported_program=llm_config.export.export_only,
         verbose=llm_config.debug.verbose,
         metadata=_load_llama_model_metadata(
-            WeightType.FAIRSEQ2 if llm_config.base.fairseq2 else WeightType.LLAMA,
             llm_config.model.use_kv_cache,
             llm_config.model.use_sdpa_with_kv_cache,
             llm_config.model.enable_dynamic_shape,