Skip to content

Commit a4a8d60

Browse files
committed
XTC: reworked to resize and penalize logits (thanks to @LostRuins)
1 parent 672f5e4 commit a4a8d60

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

base/llama-addon.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,22 @@ void llama_sample_xtc_addon(struct llama_context * ctx, llama_token_data_array *
4444
llama_sample_softmax(nullptr, candidates);
4545

4646
const int64_t t_start_sample_us = ggml_time_us();
47-
48-
for (size_t i = 0; i < candidates->size; ++i) {
47+
size_t removed = 0;
48+
for (size_t i = 0; i < (candidates->size - 1); ++i) {
4949
if (candidates->data[i].p >= xtc_threshold) {
5050
std::random_device rd;
5151
float chance = (float)(rd()%100)/100;
52+
5253
if (chance <= xtc_probability) {
53-
candidates->data[i].p *= 0;
54+
candidates->data[i].logit = -999.0f; // .p will be recalculated in llama_sample_softmax_impl later based on .logit, so we need to change these
55+
++removed;
5456
}
5557
}
5658
}
5759

60+
candidates->sorted = false;
61+
candidates->size = candidates->size - removed;
62+
5863
llama_set_time(ctx, t_start_sample_us);
5964
}
6065

0 commit comments

Comments
 (0)