Skip to content

Commit 2669c15

Browse files
committed
Initial commit
1 parent 15e6125 commit 2669c15

File tree

6 files changed

+43
-14
lines changed

6 files changed

+43
-14
lines changed

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ struct common_params_sampling {
139139
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
140140
float dynatemp_range = 0.00f; // 0.0 = disabled
141141
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
142+
float smoothing_factor = 0.0f; // controls the quadratic adjustment in smooth / quadratic sampling
143+
float smoothing_curve = 1.0f; // controls the quadratic adjustment in smooth / quadratic sampling
142144
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
143145
float penalty_repeat = 1.00f; // 1.0 = disabled
144146
float penalty_freq = 0.00f; // 0.0 = disabled

common/sampling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
262262
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
263263
break;
264264
case COMMON_SAMPLER_TYPE_TEMPERATURE:
265-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
265+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent, params.smoothing_factor, params.smoothing_curve));
266266
break;
267267
case COMMON_SAMPLER_TYPE_INFILL:
268268
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab));

include/llama.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,8 +1250,8 @@ extern "C" {
12501250
/// #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
12511251
LLAMA_API struct llama_sampler * llama_sampler_init_temp (float t);
12521252

1253-
/// @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
1254-
LLAMA_API struct llama_sampler * llama_sampler_init_temp_ext (float t, float delta, float exponent);
1253+
/// @details Dynamic temperature (a.k.a. entropy) + Smooth Sampling implementations wrapped into one function, no research papers available.
1254+
LLAMA_API struct llama_sampler * llama_sampler_init_temp_ext (float t, float delta, float exponent, float smoothing_factor, float smoothing_curve);
12551255

12561256
/// @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
12571257
LLAMA_API struct llama_sampler * llama_sampler_init_xtc (float p, float t, size_t min_keep, uint32_t seed);

src/llama-sampling.cpp

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,8 @@ struct llama_sampler_temp_ext {
10051005
const float temp;
10061006
const float delta;
10071007
const float exponent;
1008+
const float smoothing_factor;
1009+
const float smoothing_curve;
10081010
};
10091011

10101012
static const char * llama_sampler_temp_ext_name(const struct llama_sampler * /*smpl*/) {
@@ -1013,17 +1015,33 @@ static const char * llama_sampler_temp_ext_name(const struct llama_sampler * /*s
10131015

10141016
static void llama_sampler_temp_ext_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
10151017
const auto * ctx = (llama_sampler_temp_ext *) smpl->ctx;
1018+
1019+
// no need to do anything if there is only one (or zero) candidates
1020+
if (cur_p->size <= 1) {
1021+
return;
1022+
}
1023+
1024+
// Apply smoothing if smoothing_factor is > 0. Do not change base implementation otherwise.
1025+
if (ctx->smoothing_factor > 0.0f) {
1026+
llama_sampler_softmax_impl(cur_p);
1027+
float h = cur_p->data[0].logit; // Find the maximum logit for h to be added after the transformation
1028+
1029+
// Apply the modified quadratic transformation using the smoothing_factor and smoothing_curve
1030+
for (size_t i = 0; i < cur_p->size; ++i) {
1031+
float logit_shifted = cur_p->data[i].logit - h;
1032+
float k = (3 - ctx->smoothing_curve) / 2;
1033+
float s = (ctx->smoothing_curve - 1) / 2;
1034+
cur_p->data[i].logit = -(k * ctx->smoothing_factor * logit_shifted * logit_shifted) + (s * ctx->smoothing_factor * logit_shifted * logit_shifted * logit_shifted) + h;
1035+
}
1036+
llama_sampler_softmax_impl(cur_p);
1037+
}
1038+
10161039
if (ctx->delta > 0) {
10171040
const float min_temp = std::max(0.0f, ctx->temp - ctx->delta);
10181041
const float max_temp = ctx->temp + ctx->delta;
10191042

10201043
float exponent_val = ctx->exponent;
10211044

1022-
// no need to do anything if there is only one (or zero) candidates
1023-
if (cur_p->size <= 1) {
1024-
return;
1025-
}
1026-
10271045
// Calculate maximum possible entropy
10281046
float max_entropy = -logf(1.0f / cur_p->size);
10291047

@@ -1084,7 +1102,7 @@ static void llama_sampler_temp_ext_apply(struct llama_sampler * smpl, llama_toke
10841102

10851103
static struct llama_sampler * llama_sampler_temp_ext_clone(const struct llama_sampler * smpl) {
10861104
const auto * ctx = (const llama_sampler_temp_ext *) smpl->ctx;
1087-
return llama_sampler_init_temp_ext(ctx->temp, ctx->delta, ctx->exponent);
1105+
return llama_sampler_init_temp_ext(ctx->temp, ctx->delta, ctx->exponent, ctx->smoothing_factor, ctx->smoothing_curve);
10881106
}
10891107

10901108
static void llama_sampler_temp_ext_free(struct llama_sampler * smpl) {
@@ -1100,13 +1118,15 @@ static struct llama_sampler_i llama_sampler_temp_ext_i = {
11001118
/* .free = */ llama_sampler_temp_ext_free,
11011119
};
11021120

1103-
struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent) {
1121+
struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent, float smoothing_factor, float smoothing_curve) {
11041122
return llama_sampler_init(
11051123
/* .iface = */ &llama_sampler_temp_ext_i,
11061124
/* .ctx = */ new llama_sampler_temp_ext {
11071125
/* .temp = */ temp,
11081126
/* .delta = */ delta,
11091127
/* .exponent = */ exponent,
1128+
/* .smoothing_factor = */ smoothing_factor,
1129+
/* .smoothing_curve = */ smoothing_curve
11101130
}
11111131
);
11121132
}

tests/test-sampling.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ static void test_temp(const std::vector<float> & probs, const std::vector<float>
7272
tester.check();
7373
}
7474

75-
static void test_temp_ext(const std::vector<float> & probs, const std::vector<float> & probs_expected, float temp, float delta, float exponent) {
75+
static void test_temp_ext(const std::vector<float> & probs, const std::vector<float> & probs_expected, float temp, float delta, float exponent, float smoothing_factor, float smoothing_curve) {
7676
sampler_tester tester(probs, probs_expected);
7777

7878
DUMP(&tester.cur_p);
79-
tester.apply(llama_sampler_init_temp_ext(temp, delta, exponent));
79+
tester.apply(llama_sampler_init_temp_ext(temp, delta, exponent, smoothing_factor, smoothing_curve));
8080
tester.apply(llama_sampler_init_dist (0));
8181
DUMP(&tester.cur_p);
8282

@@ -311,8 +311,11 @@ int main(void) {
311311
test_temp({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1.0f);
312312
test_temp({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.0f);
313313

314-
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1.0f, 0.0f, 1.0f);
315-
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.0f, 0.0f, 1.0f);
314+
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f);
315+
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f);
316+
317+
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.372382f, 0.342804f, 0.230319f, 0.054495f}, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f);
318+
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.368339f, 0.349226f, 0.245247f, 0.037188f}, 1.0f, 0.0f, 1.0f, 1.0f, 2.0f);
316319

317320
test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f}, 1);
318321
test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.44444f, 0.33333f, 0.22222f}, 3);

tools/server/server.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ struct slot_params {
144144
{"temperature", sampling.temp},
145145
{"dynatemp_range", sampling.dynatemp_range},
146146
{"dynatemp_exponent", sampling.dynatemp_exponent},
147+
{"smoothing_factor", sampling.smoothing_factor},
148+
{"smoothing_curve", sampling.smoothing_curve},
147149
{"top_k", sampling.top_k},
148150
{"top_p", sampling.top_p},
149151
{"min_p", sampling.min_p},
@@ -257,6 +259,8 @@ struct server_task {
257259
params.sampling.temp = json_value(data, "temperature", defaults.sampling.temp);
258260
params.sampling.dynatemp_range = json_value(data, "dynatemp_range", defaults.sampling.dynatemp_range);
259261
params.sampling.dynatemp_exponent = json_value(data, "dynatemp_exponent", defaults.sampling.dynatemp_exponent);
262+
params.sampling.smoothing_factor = json_value(data, "smoothing_factor", defaults.sampling.smoothing_factor);
263+
params.sampling.smoothing_curve = json_value(data, "smoothing_curve", defaults.sampling.smoothing_curve);
260264
params.sampling.penalty_last_n = json_value(data, "repeat_last_n", defaults.sampling.penalty_last_n);
261265
params.sampling.penalty_repeat = json_value(data, "repeat_penalty", defaults.sampling.penalty_repeat);
262266
params.sampling.penalty_freq = json_value(data, "frequency_penalty", defaults.sampling.penalty_freq);

0 commit comments

Comments
 (0)