add clear kv cache to quantized qwen3 weights (#3189)

anonenity · web-flow · commit eb651c82be27 · 2025-11-18T11:12:04.000+01:00
diff --git a/candle-transformers/src/models/quantized_qwen3.rs b/candle-transformers/src/models/quantized_qwen3.rs
@@ -233,6 +233,10 @@ impl AttentionWeights {
             .reshape((b, l, self.num_heads * self.head_dim))?;
         self.o_proj.forward(&reshaped_ctx)
     }
+
+    fn clear_kv_cache(&mut self) {
+        self.kv_cache.reset();
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -283,6 +287,10 @@ impl LayerWeights {
         let h2 = h2.apply(&self.mlp)?;
         x + h2
     }
+
+    fn clear_kv_cache(&mut self) {
+        self.self_attn.clear_kv_cache();
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -416,4 +424,10 @@ impl ModelWeights {
         let last_hidden = h.narrow(1, l - 1, 1)?;
         self.lm_head.forward(&last_hidden)?.squeeze(1)
     }
+
+    pub fn clear_kv_cache(&mut self) {
+        for layer in &mut self.layers {
+            layer.clear_kv_cache();
+        }
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -233,6 +233,10 @@ impl AttentionWeights {`
`233`	`233`	`.reshape((b, l, self.num_heads * self.head_dim))?;`
`234`	`234`	`self.o_proj.forward(&reshaped_ctx)`
`235`	`235`	`}`
	`236`	`+`
	`237`	`+ fn clear_kv_cache(&mut self) {`
	`238`	`+ self.kv_cache.reset();`
	`239`	`+ }`
`236`	`240`	`}`
`237`	`241`
`238`	`242`	`#[derive(Debug, Clone)]`
`@@ -283,6 +287,10 @@ impl LayerWeights {`
`283`	`287`	`let h2 = h2.apply(&self.mlp)?;`
`284`	`288`	`x + h2`
`285`	`289`	`}`
	`290`	`+`
	`291`	`+ fn clear_kv_cache(&mut self) {`
	`292`	`+ self.self_attn.clear_kv_cache();`
	`293`	`+ }`
`286`	`294`	`}`
`287`	`295`
`288`	`296`	`#[derive(Debug, Clone)]`
`@@ -416,4 +424,10 @@ impl ModelWeights {`
`416`	`424`	`let last_hidden = h.narrow(1, l - 1, 1)?;`
`417`	`425`	`self.lm_head.forward(&last_hidden)?.squeeze(1)`
`418`	`426`	`}`
	`427`	`+`
	`428`	`+ pub fn clear_kv_cache(&mut self) {`
	`429`	`+ for layer in &mut self.layers {`
	`430`	`+ layer.clear_kv_cache();`
	`431`	`+ }`
	`432`	`+ }`
`419`	`433`	`}`