Skip to content

Commit 243fae8

Browse files
ai-edge-botcopybara-github
authored andcommitted
internal changes only
PiperOrigin-RevId: 719113596
1 parent ebb8fa6 commit 243fae8

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

ai_edge_torch/generative/layers/kv_cache.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ def from_model_config(
8181
"""
8282
caches = [
8383
KVCacheEntry.from_model_config(
84-
config.kv_cache_max,
84+
config.kv_cache_max if not config.block_config(idx).kv_cache_max_len
85+
else config.block_config(idx).kv_cache_max_len,
8586
config.block_config(idx).attn_config,
8687
dtype,
8788
device,

ai_edge_torch/generative/layers/model_config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ class TransformerBlockConfig:
164164
parallel_residual: bool = False
165165
# The Attention computation will include relative positional bias.
166166
relative_attention: bool = False
167+
# KV Cache length for this block. Only used when attention types are different
168+
# across blocks
169+
kv_cache_max_len: Optional[int] = None
167170

168171

169172
@dataclasses.dataclass
@@ -200,7 +203,8 @@ class ModelConfig:
200203
embedding_use_bias: bool = False
201204
# Image embedding parameters.
202205
image_embedding: Optional[ImageEmbeddingConfig] = None
203-
206+
# Number of image tokens
207+
num_mm_tokens_per_image: Optional[int] = None
204208
# Use bias term within LLM's HEAD.
205209
lm_head_use_bias: bool = False
206210
# Whether LLM's HEAD shares the weight of the embedding.

0 commit comments

Comments
 (0)