From 6d3025ddec3d379819c322723b8906217220bf5d Mon Sep 17 00:00:00 2001
From: Shen Xu <shenchenxu@meta.com>
Date: Mon, 13 Oct 2025 09:16:20 -0700
Subject: [PATCH] Create KV cache input tensor only if cache len > 0 for that
 layer (#15042)

Summary:

The MHA branch has this logic already, add it to the other branch.

Differential Revision: D84471388
---
 examples/models/llama/static_attention.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py
index 95bae1b766a..849718527ed 100644
--- a/examples/models/llama/static_attention.py
+++ b/examples/models/llama/static_attention.py
@@ -297,6 +297,7 @@ def __init__(
                     dtype=dtype,
                 )
                 for layer_id in range(config.n_layers)
+                if cache_lens[layer_id] > 0
             }
             self.v_caches = {
                 StaticKVCache.calculate_cache_key(layer_id, 0): torch.zeros(
@@ -307,6 +308,7 @@ def __init__(
                     dtype=dtype,
                 )
                 for layer_id in range(config.n_layers)
+                if cache_lens[layer_id] > 0
             }
 
         self.config = config