Skip to content

Commit 66be13b

Browse files
committed
fixed quantized_phi3 implementation
1 parent cd96fa8 commit 66be13b

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

candle-transformers/src/models/quantized_phi3.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ impl LayerWeights {
136136
let q = self.apply_rotary_emb(&q, index_pos)?.contiguous()?;
137137
let k = self.apply_rotary_emb(&k, index_pos)?;
138138

139+
if index_pos == 0 {
140+
self.kv_cache.reset();
141+
}
139142
let (k, v) = self.kv_cache.append(&k.contiguous()?, &v.contiguous()?)?;
140143

141144
let k = crate::utils::repeat_kv(k, self.n_head / self.n_kv_head)?;

0 commit comments

Comments
 (0)