From 0e547c1ec9855db1015d58910bee47afa8b1042e Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Fri, 17 Oct 2025 13:49:51 -0700 Subject: [PATCH] Remove default bos/eos from metadata (#15231) Summary: See: https://github.com/pytorch/executorch/pull/15215 Currently: - default eos/bos tokens are embedded into the pte - llama3 instruct has a different set of eos/bos tokens - users must manually specify at export time the llama3 instruct eos/bos tokens, because the runner overrides tokenizer eos/bos with the values in the PTE This diff: - removes the defaults - rely on tokenizer for eos/bos UNLESS the user explicitly specifies in the metadata, in which case use the eos/bos saved in PTE. Reviewed By: jackzhxng Differential Revision: D84942718 --- examples/models/llama/export_llama_lib.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index b1cfb058882..0d6dc87de2f 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -15,7 +15,6 @@ import logging import re import shlex -from enum import Enum from functools import partial from importlib import resources as _resources @@ -121,11 +120,6 @@ } -class WeightType(Enum): - LLAMA = "LLAMA" - FAIRSEQ2 = "FAIRSEQ2" - - def set_pkg_name(name: str) -> None: global pkg_name pkg_name = name @@ -1247,7 +1241,6 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901 def _load_llama_model_metadata( - weight_type: WeightType, use_kv_cache: bool, use_sdpa_with_kv_cache: bool, enable_dynamic_shape: bool, @@ -1257,10 +1250,7 @@ def _load_llama_model_metadata( vocab_size: int, metadata_str: Optional[str] = None, ): - is_fairseq2 = weight_type == WeightType.FAIRSEQ2 metadata = { - "get_bos_id": 3 if is_fairseq2 else 1, - "get_eos_ids": [3] if is_fairseq2 else [2], "get_max_seq_len": max_seq_len, "get_max_context_len": max_context_len, "get_n_layers": n_layers, @@ -1332,7 +1322,6 @@ def _load_llama_model(llm_config: LlmConfig) -> "LLMEdgeManager": save_exported_program=llm_config.export.export_only, verbose=llm_config.debug.verbose, metadata=_load_llama_model_metadata( - WeightType.FAIRSEQ2 if llm_config.base.fairseq2 else WeightType.LLAMA, llm_config.model.use_kv_cache, llm_config.model.use_sdpa_with_kv_cache, llm_config.model.enable_dynamic_shape,