From a77980c94d430c9731094cd85ea2501c9eba6e06 Mon Sep 17 00:00:00 2001
From: Shen Xu <shenchenxu@meta.com>
Date: Fri, 22 Aug 2025 15:32:21 -0700
Subject: [PATCH] Call .detach() in static attention cache update helper

Summary: This reduces memory usage.

Differential Revision: D80853817
---
 examples/models/llama/static_attention.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py
index 5ffd25f2c7f..fb1a05f4cc9 100644
--- a/examples/models/llama/static_attention.py
+++ b/examples/models/llama/static_attention.py
@@ -549,7 +549,7 @@ def _update_states(self, attn_updates, update_pos, update_len):
                 style=self.style,
                 update_pos=update_pos,
                 update_len=update_len,
-            )
+            ).detach()
         for cache_id, update in v_cache_updates.items():
             self.v_caches[cache_id] = StaticKVCache.apply_update(
                 self.v_caches[cache_id],
@@ -558,7 +558,7 @@ def _update_states(self, attn_updates, update_pos, update_len):
                 style=self.style,
                 update_pos=update_pos,
                 update_len=update_len,
-            )
+            ).detach()
         self.pos += update_len
 
     def _get_lookahead_decoding_mask(