Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions examples/models/llama/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ class BaseConfig:
These are specific to the specific model, e.g. whether it’s Qwen3 0.6B or Phi-4-mini.
for each of these different models, you can expect each of these fields to change.
"""

model_class: str = "llama"
params: Optional[str] = None
checkpoint: Optional[str] = None
Expand All @@ -41,7 +40,6 @@ class ModelConfig:
optimizations / actual configurations. The same ModelConfig can be applied
to different models.
"""

dtype_override: str = "fp32"
enable_dynamic_shape: bool = True
use_shared_embedding: bool = False
Expand All @@ -68,6 +66,17 @@ class ExportConfig:


@dataclass
<<<<<<< HEAD
=======
class KVCacheConfig:
use_kv_cache: Optional[bool] = None
quantize_kv_cache: Optional[bool] = None
local_global_attention: List[int] = None
# ...potentially more in the future such as cache eviction strategy


@dataclass
>>>>>>> ec85c4be2 (Add new export LLM config)
class DebugConfig:
profile_memory: bool = False
profile_path: Optional[str] = None
Expand Down
Loading