From 6d3025ddec3d379819c322723b8906217220bf5d Mon Sep 17 00:00:00 2001 From: Shen Xu Date: Mon, 13 Oct 2025 09:16:20 -0700 Subject: [PATCH] Create KV cache input tensor only if cache len > 0 for that layer (#15042) Summary: The MHA branch has this logic already, add it to the other branch. Differential Revision: D84471388 --- examples/models/llama/static_attention.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index 95bae1b766a..849718527ed 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -297,6 +297,7 @@ def __init__( dtype=dtype, ) for layer_id in range(config.n_layers) + if cache_lens[layer_id] > 0 } self.v_caches = { StaticKVCache.calculate_cache_key(layer_id, 0): torch.zeros( @@ -307,6 +308,7 @@ def __init__( dtype=dtype, ) for layer_id in range(config.n_layers) + if cache_lens[layer_id] > 0 } self.config = config