@@ -43,7 +43,6 @@ void llama_sample_xtc_addon(struct llama_context * ctx, llama_token_data_array *
4343
4444 std::random_device rd;
4545 float chance = (float )(rd ()%100 )/100 ;
46- // printf("\nChance = %f; ", chance);
4746 if (xtc_probability_once && chance > xtc_probability) return ;
4847
4948 llama_sample_softmax (nullptr , candidates);
@@ -53,27 +52,21 @@ void llama_sample_xtc_addon(struct llama_context * ctx, llama_token_data_array *
5352 for (size_t i = 0 ; i < (candidates->size - 1 ); ++i) {
5453 if (candidates->data [i].p >= xtc_threshold) {
5554 if (xtc_probability_once || chance <= xtc_probability) {
56- candidates->data [i].logit = -999 .0f ; // .p will be recalculated in llama_sample_softmax_impl later based on .logit, so we need to change these
55+ // .logits are used for sorting and calculating .p in llama_sample_softmax_impl
56+ candidates->data [i].logit = -999 .0f ;
5757 ++removed;
58- if (!xtc_probability_once) {
59- chance = (float )(rd ()%100 )/100 ;
60- printf (" chance = %f; " , chance);
61- }
58+ if (!xtc_probability_once) chance = (float )(rd ()%100 )/100 ;
6259 }
6360 }
6461 }
6562
66- // printf("\nPresort (size %zu): %f, %f, %f", candidates->size, candidates->data[0].p, candidates->data[1].p, candidates->data[2].p);
67-
6863 // sorting with new logits
6964 std::sort (candidates->data , candidates->data + candidates->size , [](const llama_token_data & a, const llama_token_data & b) {
7065 return a.logit > b.logit ;
7166 });
7267 // resizing now that penalized tokens are at the back
7368 candidates->size = candidates->size - removed;
7469
75- // printf("\nSort (size %zu): %f, %f, %f\n", candidates->size, candidates->data[0].p, candidates->data[1].p, candidates->data[2].p);
76-
7770 llama_set_time (ctx, t_start_sample_us);
7871}
7972
0 commit comments