Skip to content

Commit 372c9cf

Browse files
authored
Merge pull request #2937 from ljt019/fix-phi3-kv-cache-reset
fix kv cache issue with quantized_phi3 implementation
2 parents 8045af9 + 038e28b commit 372c9cf

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

candle-transformers/src/models/quantized_phi3.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ impl LayerWeights {
136136
let q = self.apply_rotary_emb(&q, index_pos)?.contiguous()?;
137137
let k = self.apply_rotary_emb(&k, index_pos)?;
138138

139+
if index_pos == 0 {
140+
self.kv_cache.reset();
141+
}
139142
let (k, v) = self.kv_cache.append(&k.contiguous()?, &v.contiguous()?)?;
140143

141144
let k = crate::utils::repeat_kv(k, self.n_head / self.n_kv_head)?;

0 commit comments

Comments
 (0)