Skip to content

Commit bef122e

Browse files
committed
speculative: use slot.cache_tokens.insert
1 parent eeda075 commit bef122e

File tree

2 files changed

+2
-4
lines changed

2 files changed

+2
-4
lines changed

tools/server/server.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3484,9 +3484,7 @@ struct server_context {
34843484
slot.n_draft_accepted += ids.size() - 1;
34853485

34863486
slot.cache_tokens.push_back(id);
3487-
for (auto & t : ids) {
3488-
slot.cache_tokens.push_back(t);
3489-
}
3487+
slot.cache_tokens.insert(ids);
34903488

34913489
llama_kv_self_seq_rm(ctx, slot.id, slot.n_past, -1);
34923490

tools/server/utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ struct server_tokens {
11241124
}
11251125

11261126
// for compatibility with context shift and prompt truncation
1127-
void insert(llama_tokens & inp_tokens) {
1127+
void insert(const llama_tokens & inp_tokens) {
11281128
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
11291129
tokens.insert(tokens.end(), inp_tokens.begin(), inp_tokens.end());
11301130
}

0 commit comments

Comments
 (0)