Skip to content

Commit cdae282

Browse files
committed
Add new export LLM config
ghstack-source-id: 999f28a Pull Request resolved: pytorch/executorch#11026
1 parent 2ae7aef commit cdae282

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

examples/models/llama/config/llm_config.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ class BaseConfig:
2323
These are specific to the specific model, e.g. whether it’s Qwen3 0.6B or Phi-4-mini.
2424
for each of these different models, you can expect each of these fields to change.
2525
"""
26-
2726
model_class: str = "llama"
2827
params: Optional[str] = None
2928
checkpoint: Optional[str] = None
@@ -41,7 +40,6 @@ class ModelConfig:
4140
optimizations / actual configurations. The same ModelConfig can be applied
4241
to different models.
4342
"""
44-
4543
dtype_override: str = "fp32"
4644
enable_dynamic_shape: bool = True
4745
use_shared_embedding: bool = False
@@ -68,6 +66,17 @@ class ExportConfig:
6866

6967

7068
@dataclass
69+
<<<<<<< HEAD
70+
=======
71+
class KVCacheConfig:
72+
use_kv_cache: Optional[bool] = None
73+
quantize_kv_cache: Optional[bool] = None
74+
local_global_attention: List[int] = None
75+
# ...potentially more in the future such as cache eviction strategy
76+
77+
78+
@dataclass
79+
>>>>>>> ec85c4be2 (Add new export LLM config)
7180
class DebugConfig:
7281
profile_memory: bool = False
7382
profile_path: Optional[str] = None

0 commit comments

Comments
 (0)