Skip to content

Commit 9c43a01

Browse files
committed
Removed xtc_threshold_max
1 parent acada1a commit 9c43a01

File tree

11 files changed

+21
-55
lines changed

11 files changed

+21
-55
lines changed

common/arg.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -987,13 +987,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
987987
params.sparams.xtc_threshold = std::stof(value);
988988
}
989989
).set_sparam());
990-
add_opt(common_arg(
991-
{"-xtc-t-max", "--xtc-threshold-max"}, "N",
992-
format("xtc upper threshold (default: %.1f, 0.0 = disabled)", (double)params.sparams.xtc_threshold_max),
993-
[](common_params & params, const std::string & value) {
994-
params.sparams.xtc_threshold_max = std::stof(value);
995-
}
996-
).set_sparam());
997990
add_opt(common_arg(
998991
{"--typical"}, "N",
999992
format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sparams.typ_p),

common/common.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2090,7 +2090,6 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20902090
fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p);
20912091
fprintf(stream, "xtc_probability: %f # default: 0.5\n", sparams.xtc_probability);
20922092
fprintf(stream, "xtc_threshold: %f # default: 0.1\n", sparams.xtc_threshold);
2093-
fprintf(stream, "xtc_threshold_max: %f # default: 1.0\n", sparams.xtc_threshold_max);
20942093
fprintf(stream, "typ_p: %f # default: 1.0\n", sparams.typ_p);
20952094
fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
20962095
fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false");

common/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ struct common_sampler_params {
112112
float min_p = 0.05f; // 0.0 = disabled
113113
float xtc_probability = 0.00f; // 0.0 = disabled
114114
float xtc_threshold = 0.10f; // 0.5 = disabled
115-
float xtc_threshold_max = 1.00f; // 0.0 = disabled
116115
float tfs_z = 1.00f; // 1.0 = disabled
117116
float typ_p = 1.00f; // typical_p, 1.0 = disabled
118117
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities

common/sampling.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,10 @@ std::string common_sampler_params::print() const {
130130

131131
snprintf(result, sizeof(result),
132132
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
133-
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, xtc_threshold_max = %.3f, typical_p = %.3f, temp = %.3f\n"
133+
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
134134
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
135135
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
136-
top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, xtc_threshold_max, typ_p, temp,
136+
top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
137137
mirostat, mirostat_eta, mirostat_tau);
138138

139139
return std::string(result);
@@ -185,7 +185,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
185185
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
186186
break;
187187
case COMMON_SAMPLER_TYPE_XTC:
188-
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.xtc_threshold_max, params.min_keep, params.seed));
188+
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
189189
break;
190190
case COMMON_SAMPLER_TYPE_TFS_Z:
191191
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));

examples/main/README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,14 @@ Example usage: `--mirostat 2 --mirostat-lr 0.05 --mirostat-ent 3.0`
245245

246246
- `--xtc-probability N`: Sets the chance for token removal (checked once on sampler start) (default: 0.0).
247247
- `--xtc-threshold N`: Sets a minimum probability threshold for tokens to be removed (default: 0.1).
248-
- `--xtc-threshold-max N`: Sets a maximum probability threshold for tokens to be removed (highly experimental) (default: 1.0).
249248

250249
Exclude Top Choices (XTC) is a unique sampler that is designed to remove top tokens from consideration and avoid more obvious and repetitive outputs. With a chance of `xtc-p` it searches for tokens with probabilities of `xtc-threshold` and above, then removes all such tokens except the least probable one.
251250

252251
By removing top tokens XTC can improve the variety of answers, break writing clichés and inhibit repition, since clichés and repeated phrases are usually more likely to appear. By keeping the last token above the threshold, XTC ensures that the answer is still coherent. XTC is meant to be used for creative tasks, but feel free to experiment with different settings for different models.
253252

254-
The additional `xtc-threshold-max` parameter may help with finetuned models that already give relatively creative output, meaning that clichés and repetitive phrases may appear at lower probabilities. It allows to remove tokens from a middle range which will always be specific to a model, requiring careful experimenting. Leave `xtc-threshold-max` on default 1.0 for all base/instruct models.
255-
256253
Being experimental and unique, XTC is disabled by default. The recommended combination of samplers is Min-P followed by XTC on its default settings: `--sampling-seq mx --min-p 0.02 -xtc-p 0.5`.
257254

258-
Example usage: `-xtc-p 0.5 -xtc-t 0.1 -xtc-t-max 1.0`
255+
Example usage: `-xtc-p 0.5 -xtc-t 0.1
259256

260257
### Logit Bias
261258

examples/server/public/index-new.html

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
min_p: 0.05, // 0 = disabled; recommended for non-english: ~ 0.4
4646
xtc_probability: 0.0, // 0 = disabled;
4747
xtc_threshold: 0.1, // 0.5 = disabled;
48-
xtc_threshold_max: 1.0, // 0 = disabled;
4948
tfs_z: 1.0, // 1.0 = disabled
5049
typical_p: 1.0, // 1.0 = disabled
5150
presence_penalty: 0.0, // 0.0 = disabled
@@ -841,7 +840,6 @@
841840
${FloatField({ label: "Typical-P", title: "Activates local typical sampling, a method used to limit the prediction of tokens that are atypical in the current context. The parameter p controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
842841
${FloatField({ label: "XTC probability", title: "Sets the chance for token removal (checked once on sampler start)", max: 1.0, min: 0.0, name: "xtc_probability", step: 0.01, value: params.value.xtc_probability })}
843842
${FloatField({ label: "XTC threshold", title: "Sets a minimum probability threshold for tokens to be removed", max: 0.5, min: 0.0, name: "xtc_threshold", step: 0.01, value: params.value.xtc_threshold })}
844-
${FloatField({ label: "XTC max threshold", title: "Sets a maximum probability threshold for tokens to be removed (highly experimental)", max: 1.0, min: 0.0, name: "xtc_threshold_max", step: 0.01, value: params.value.xtc_threshold_max })}
845843
${IntField({ label: "Min Keep", title: "If greater than 0, samplers are forced to return N possible tokens at minimum. Default is 0", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
846844
</fieldset>
847845
@@ -1140,7 +1138,6 @@ <h2>llama.cpp</h2>
11401138
min_p: { snapValue: 0.05, snapRangeMultiplier: 2 },
11411139
xtc_probability: { snapValue: 0.0, snapRangeMultiplier: 4 },
11421140
xtc_threshold: { snapValue: 0.5, snapRangeMultiplier: 4 },
1143-
xtc_threshold_max: { snapValue: 1.0, snapRangeMultiplier: 4 },
11441141
top_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
11451142
tfs_z: { snapValue: 1.0, snapRangeMultiplier: 4 },
11461143
typical_p: { snapValue: 1.0, snapRangeMultiplier: 4 },

examples/server/public/index.html

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,6 @@
309309
min_p: 0.05, // 0 = disabled
310310
xtc_probability: 0.0, // 0 = disabled;
311311
xtc_threshold: 0.1, // 0.5 = disabled;
312-
xtc_threshold_max: 1.0, // 0 = disabled;
313312
tfs_z: 1.0, // 1.0 = disabled
314313
typical_p: 1.0, // 1.0 = disabled
315314
presence_penalty: 0.0, // 0.0 = disabled
@@ -1018,7 +1017,6 @@
10181017
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
10191018
${FloatField({ label: "XTC probability", max: 1.0, min: 0.0, name: "xtc_probability", step: 0.01, value: params.value.xtc_probability })}
10201019
${FloatField({ label: "XTC threshold", max: 0.5, min: 0.0, name: "xtc_threshold", step: 0.01, value: params.value.xtc_threshold })}
1021-
${FloatField({ label: "XTC upper threshold", max: 1.0, min: 0.0, name: "xtc_threshold_max", step: 0.01, value: params.value.xtc_threshold_max })}
10221020
</fieldset>
10231021
<hr />
10241022
<fieldset class="three">

examples/server/server.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,6 @@ struct server_context {
893893
slot.sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
894894
slot.sparams.xtc_probability = json_value(data, "xtc_probability", default_sparams.xtc_probability);
895895
slot.sparams.xtc_threshold = json_value(data, "xtc_threshold", default_sparams.xtc_threshold);
896-
slot.sparams.xtc_threshold_max = json_value(data, "xtc_threshold_max", default_sparams.xtc_threshold_max);
897896
slot.sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
898897
slot.sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
899898
slot.sparams.temp = json_value(data, "temperature", default_sparams.temp);
@@ -1244,7 +1243,6 @@ struct server_context {
12441243
{"min_p", slot.sparams.min_p},
12451244
{"xtc_probability", slot.sparams.xtc_probability},
12461245
{"xtc_threshold", slot.sparams.xtc_threshold},
1247-
{"xtc_threshold_max", slot.sparams.xtc_threshold_max},
12481246
{"tfs_z", slot.sparams.tfs_z},
12491247
{"typical_p", slot.sparams.typ_p},
12501248
{"repeat_last_n", slot.sparams.penalty_last_n},

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1095,7 +1095,7 @@ extern "C" {
10951095
LLAMA_API struct llama_sampler * llama_sampler_init_temp_ext (float t, float delta, float exponent);
10961096

10971097
/// @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
1098-
LLAMA_API struct llama_sampler * llama_sampler_init_xtc (float p, float t, float t_max, size_t min_keep, uint32_t seed);
1098+
LLAMA_API struct llama_sampler * llama_sampler_init_xtc (float p, float t, size_t min_keep, uint32_t seed);
10991099

11001100
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
11011101
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.

src/llama-sampling.cpp

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,6 @@ struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, floa
10641064
struct llama_sampler_xtc {
10651065
const float probability;
10661066
const float threshold;
1067-
const float threshold_max;
10681067
const size_t min_keep;
10691068

10701069
const uint32_t seed;
@@ -1082,8 +1081,6 @@ static void llama_sample_xtc_apply(struct llama_sampler * smpl, llama_token_data
10821081

10831082
if (ctx->probability <= 0.0f
10841083
|| ctx->threshold > 0.5f
1085-
|| ctx->threshold_max <= 0.0f
1086-
|| ctx->threshold_max <= ctx->threshold
10871084
|| cur_p->size <= 2) {
10881085
return;
10891086
}
@@ -1095,35 +1092,29 @@ static void llama_sample_xtc_apply(struct llama_sampler * smpl, llama_token_data
10951092
// in case it's not sorted/recalculated yet
10961093
llama_sampler_softmax_impl(cur_p);
10971094

1098-
int pos_first = -1;
10991095
int pos_last = 0;
11001096

11011097
for (size_t i = 0; i < cur_p->size; ++i) {
11021098
if (cur_p->data[i].p - ctx->threshold >= -1e-5) {
1103-
if (cur_p->data[i].p - ctx->threshold_max > 1e-3) pos_first = i;
11041099
pos_last = i;
1105-
} else {
1106-
break;
1107-
}
1100+
} else break;
11081101
}
11091102

1110-
int to_remove = pos_last - (1 + pos_first);
1111-
1112-
if (cur_p->size - to_remove >= ctx->min_keep && to_remove > 0) {
1103+
if (cur_p->size - pos_last >= ctx->min_keep && pos_last > 0) {
11131104

1114-
size_t last_idx = cur_p->size - to_remove;
1105+
size_t last_idx = cur_p->size - pos_last;
11151106

1116-
for (size_t i = pos_first + 1; i <= last_idx; ++i) {
1117-
cur_p->data[i] = cur_p->data[i + to_remove];
1107+
for (size_t i = 0; i <= last_idx; ++i) {
1108+
cur_p->data[i] = cur_p->data[i + pos_last];
11181109
}
11191110

1120-
cur_p->size = cur_p->size - to_remove;
1111+
cur_p->size = cur_p->size - pos_last;
11211112
}
11221113
}
11231114

11241115
static struct llama_sampler * llama_sampler_xtc_clone(const struct llama_sampler * smpl) {
11251116
const auto * ctx = (const llama_sampler_xtc *) smpl->ctx;
1126-
auto * result = llama_sampler_init_xtc(ctx->probability, ctx->threshold, ctx->threshold_max, ctx->min_keep, ctx->seed);
1117+
auto * result = llama_sampler_init_xtc(ctx->probability, ctx->threshold, ctx->min_keep, ctx->seed);
11271118

11281119
// copy the state
11291120
{
@@ -1154,14 +1145,13 @@ static struct llama_sampler_i llama_sampler_xtc_i = {
11541145
/* .free = */ llama_sampler_xtc_free,
11551146
};
11561147

1157-
struct llama_sampler * llama_sampler_init_xtc(float p, float t, float t_max, size_t min_keep, uint32_t seed) {
1148+
struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep, uint32_t seed) {
11581149
auto seed_cur = get_rng_seed(seed);
11591150
return new llama_sampler {
11601151
/* .iface = */ &llama_sampler_xtc_i,
11611152
/* .ctx = */ new llama_sampler_xtc {
11621153
/* .probability = */ p,
11631154
/* .threshold = */ t,
1164-
/* .threshold_max = */ t_max,
11651155
/* .min_keep = */ min_keep,
11661156
/* .seed = */ seed,
11671157
/* .seed_cur = */ seed_cur,

0 commit comments

Comments
 (0)