Skip to content

Commit 9613c48

Browse files
committed
with logging
1 parent d1e5c60 commit 9613c48

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

src/llama-sampling.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2362,12 +2362,16 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
23622362
static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx, float decay) {
23632363
if (ctx->total_weight == 0.0f) {
23642364
// if there is no history, just use base target
2365+
fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", ctx->target);
2366+
fflush(stderr);
23652367
return ctx->target;
23662368
}
23672369

23682370
// maintain a running weighted sum with exponential decay
23692371
float new_total_weight = 1.0f + decay * ctx->total_weight;
2372+
fprintf(stderr, "power-law: compute_target: new_total_weight = %.3f\n", new_total_weight); fflush(stderr);
23702373
float next_value = ctx->target * new_total_weight - decay * ctx->weighted_sum;
2374+
fprintf(stderr, "power-law: compute_target: next_value = %.3f\n", next_value); fflush(stderr);
23712375

23722376
// clamp to [0.0, 1.0]
23732377
return std::max(0.0f, std::min(next_value, 1.0f));
@@ -2378,14 +2382,16 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
23782382

23792383
if (ctx->target < 0.0f) {
23802384
// no-op: just sample from the distribution as-is
2385+
fprintf(stderr, "power-law: no-op!"); fflush(stderr);
23812386
llama_sampler_softmax_impl(cur_p, false);
2382-
const int idx = llama_sample_dist(cur_p, ctx->rng);
2387+
const int idx = llama_sample_dist(cur_p, ctx->rng);
23832388
cur_p->selected = idx;
23842389
return;
23852390
}
23862391

23872392
// clamp decay to avoid degenerate case at 1.0 (unbounded accumulation)
23882393
const float decay = std::min(ctx->decay, 0.99f);
2394+
fprintf(stderr, "power-law: decay = %.3f\n", decay); fflush(stderr);
23892395

23902396
// fixed power law transform parameters
23912397
const float distribution_width = 0.3f;
@@ -2403,22 +2409,28 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
24032409
}
24042410

24052411
float computed_target = llama_sampler_power_law_compute_target(ctx, decay);
2412+
fprintf(stderr, "power-law: computed_target = %.3f\n", computed_target); fflush(stderr);
24062413

24072414
//
24082415
// power law transform
24092416
//
24102417

24112418
for (size_t i = 0; i < cur_p->size; ++i) {
2412-
float p = cur_p->data[i].p;
2413-
float normalized_distance = std::abs(p - computed_target) / distribution_width;
2414-
cur_p->data[i].logit = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
2419+
float p = cur_p->data[i].p;
2420+
fprintf(stderr, "power-law: transform: p = %.3f\n", p); fflush(stderr);
2421+
float normed_distance = std::abs(p - computed_target) / distribution_width;
2422+
fprintf(stderr, "power-law: transform: normed_distance = %.3f\n", normed_distance); fflush(stderr);
2423+
float new_p = peak_logit_value / (1.0f + std::pow(normed_distance, tail_heaviness));
2424+
fprintf(stderr, "power-law: transform: new_p = %.3f\n", new_p); fflush(stderr);
2425+
cur_p->data[i].logit = new_p;
24152426
}
24162427

24172428
llama_sampler_softmax_impl(cur_p, false);
24182429

24192430
// sample from transformed distribution
24202431
const int idx = llama_sample_dist(cur_p, ctx->rng);
24212432
cur_p->selected = idx;
2433+
fprintf(stderr, "power-law: selected token %d\n", idx); fflush(stderr);
24222434

24232435
// update running history with the original probability of the selected token
24242436
float original_p = original_probs[idx];

0 commit comments

Comments
 (0)