We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 04fbc5f commit 1958f7eCopy full SHA for 1958f7e
llama.cpp
@@ -13063,6 +13063,11 @@ struct llama_beam_search_data {
13063
}
13064
llama_logit_info logit_info(ctx);
13065
std::vector<llama_token_data> next_tokens = logit_info.top_k(n_beams);
13066
+
13067
+ // Clear the kv slot so that other beams may try different tokens at this position. The llama_decode()
13068
+ // call in loop() will conclusively fill in the kv slot once the beams converge at this position.
13069
+ llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
13070
13071
size_t i=0;
13072
if (next_beams.size() < n_beams) {
13073
for (; next_beams.size() < n_beams ; ++i) {
0 commit comments