Skip to content

Commit 9c233c7

Browse files
authored
Merge branch 'master' into k-shift2
2 parents e83245e + 8d8ff71 commit 9c233c7

File tree

16 files changed

+11
-167
lines changed

16 files changed

+11
-167
lines changed

common/arg.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -950,13 +950,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
950950
params.sparams.min_p = std::stof(value);
951951
}
952952
).set_sparam());
953-
add_opt(common_arg(
954-
{"--tfs"}, "N",
955-
string_format("tail free sampling, parameter z (default: %.1f, 1.0 = disabled)", (double)params.sparams.tfs_z),
956-
[](common_params & params, const std::string & value) {
957-
params.sparams.tfs_z = std::stof(value);
958-
}
959-
).set_sparam());
960953
add_opt(common_arg(
961954
{"--xtc-probability"}, "N",
962955
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sparams.xtc_probability),
@@ -1081,7 +1074,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
10811074
).set_sparam());
10821075
add_opt(common_arg(
10831076
{"--mirostat"}, "N",
1084-
string_format("use Mirostat sampling.\nTop K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
1077+
string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n"
10851078
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sparams.mirostat),
10861079
[](common_params & params, int value) {
10871080
params.sparams.mirostat = value;

common/common.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2090,7 +2090,6 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20902090
const std::vector<float> tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices());
20912091
yaml_dump_vector_float(stream, "tensor_split", tensor_split_vector);
20922092

2093-
fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
20942093
fprintf(stream, "threads: %d # default: %u\n", params.cpuparams.n_threads, std::thread::hardware_concurrency());
20952094
fprintf(stream, "k_shift: %d # default: 0\n", sparams.k_shift);
20962095
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);

common/common.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ enum common_sampler_type {
8989
COMMON_SAMPLER_TYPE_TOP_K = 3,
9090
COMMON_SAMPLER_TYPE_TOP_P = 4,
9191
COMMON_SAMPLER_TYPE_MIN_P = 5,
92-
COMMON_SAMPLER_TYPE_TFS_Z = 6,
92+
//COMMON_SAMPLER_TYPE_TFS_Z = 6,
9393
COMMON_SAMPLER_TYPE_TYPICAL_P = 7,
9494
COMMON_SAMPLER_TYPE_TEMPERATURE = 8,
9595
COMMON_SAMPLER_TYPE_XTC = 9,
@@ -115,7 +115,6 @@ struct common_sampler_params {
115115
float min_p = 0.05f; // 0.0 = disabled
116116
float xtc_probability = 0.00f; // 0.0 = disabled
117117
float xtc_threshold = 0.10f; // > 0.5 disables XTC
118-
float tfs_z = 1.00f; // 1.0 = disabled
119118
float typ_p = 1.00f; // typical_p, 1.0 = disabled
120119
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
121120
float dynatemp_range = 0.00f; // 0.0 = disabled
@@ -142,7 +141,6 @@ struct common_sampler_params {
142141
COMMON_SAMPLER_TYPE_DRY,
143142
COMMON_SAMPLER_TYPE_K_SHIFT,
144143
COMMON_SAMPLER_TYPE_TOP_K,
145-
COMMON_SAMPLER_TYPE_TFS_Z,
146144
COMMON_SAMPLER_TYPE_TYPICAL_P,
147145
COMMON_SAMPLER_TYPE_TOP_P,
148146
COMMON_SAMPLER_TYPE_MIN_P,

common/sampling.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,11 @@ std::string common_sampler_params::print() const {
131131
snprintf(result, sizeof(result),
132132
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
133133
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
134-
"\tk_shift = %d, top_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
134+
"\tk_shift = %d, top_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
135135
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
136136
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
137137
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
138-
k_shift, top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
138+
k_shift, top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
139139
mirostat, mirostat_eta, mirostat_tau);
140140

141141
return std::string(result);
@@ -202,9 +202,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
202202
case COMMON_SAMPLER_TYPE_XTC:
203203
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
204204
break;
205-
case COMMON_SAMPLER_TYPE_TFS_Z:
206-
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
207-
break;
208205
case COMMON_SAMPLER_TYPE_TYPICAL_P:
209206
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
210207
break;
@@ -377,7 +374,6 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
377374
case COMMON_SAMPLER_TYPE_DRY: return 'd';
378375
case COMMON_SAMPLER_TYPE_K_SHIFT: return 's';
379376
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
380-
case COMMON_SAMPLER_TYPE_TFS_Z: return 'f';
381377
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
382378
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
383379
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
@@ -393,7 +389,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
393389
case COMMON_SAMPLER_TYPE_DRY: return "dry";
394390
case COMMON_SAMPLER_TYPE_K_SHIFT: return "k_shift";
395391
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
396-
case COMMON_SAMPLER_TYPE_TFS_Z: return "tfs_z";
397392
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
398393
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
399394
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
@@ -412,7 +407,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
412407
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
413408
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
414409
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
415-
{ "tfs_z", COMMON_SAMPLER_TYPE_TFS_Z },
416410
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
417411
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
418412
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
@@ -430,8 +424,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
430424
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
431425
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
432426
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
433-
{ "tfs-z", COMMON_SAMPLER_TYPE_TFS_Z },
434-
{ "tfs", COMMON_SAMPLER_TYPE_TFS_Z },
435427
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
436428
};
437429

@@ -460,7 +452,6 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
460452
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
461453
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_K_SHIFT), COMMON_SAMPLER_TYPE_K_SHIFT },
462454
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
463-
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TFS_Z), COMMON_SAMPLER_TYPE_TFS_Z },
464455
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
465456
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
466457
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },

examples/main/README.md

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,6 @@ The Min-P sampling method was designed as an alternative to Top-P, and aims to e
243243

244244
Example usage: `--min-p 0.05`
245245

246-
### Tail-Free Sampling (TFS)
247-
248-
- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
249-
250-
Tail-free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. Similar to Top-P it tries to determine the bulk of the most likely tokens dynamically. But TFS filters out logits based on the second derivative of their probabilities. Adding tokens is stopped after the sum of the second derivatives reaches the parameter z. In short: TFS looks at how quickly the probabilities of the tokens decrease and cuts off the tail of unlikely tokens using the parameter z. Typical values for z are in the range of 0.9 to 0.95. A value of 1.0 would include all tokens and thus disables the effect of TFS.
251-
252-
Example usage: `--tfs 0.95`
253-
254246
### Locally Typical Sampling
255247

256248
- `--typical N`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).

examples/server/README.md

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ The project is under active development, and we are [looking for feedback and co
9999

100100
| Argument | Explanation |
101101
| -------- | ----------- |
102-
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;tfs_z;typ_p;top_p;min_p;temperature) |
102+
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;typ_p;top_p;min_p;temperature) |
103103
| `-s, --seed SEED` | RNG seed (default: -1, use random seed for -1) |
104104
| `--sampling-seq SEQUENCE` | simplified sequence for samplers that will be used (default: kfypmt) |
105105
| `--ignore-eos` | ignore end of stream token and continue generating (implies --logit-bias EOS-inf) |
@@ -108,7 +108,6 @@ The project is under active development, and we are [looking for feedback and co
108108
| `--top-k N` | top-k sampling (default: 40, 0 = disabled) |
109109
| `--top-p N` | top-p sampling (default: 0.9, 1.0 = disabled) |
110110
| `--min-p N` | min-p sampling (default: 0.1, 0.0 = disabled) |
111-
| `--tfs N` | tail free sampling, parameter z (default: 1.0, 1.0 = disabled) |
112111
| `--typical N` | locally typical sampling, parameter p (default: 1.0, 1.0 = disabled) |
113112
| `--repeat-last-n N` | last n tokens to consider for penalize (default: 64, 0 = disabled, -1 = ctx_size) |
114113
| `--repeat-penalty N` | penalize repeat sequence of tokens (default: 1.0, 1.0 = disabled) |
@@ -121,7 +120,7 @@ The project is under active development, and we are [looking for feedback and co
121120
| `--dry-sequence-breaker STRING` | add sequence breaker for DRY sampling, clearing out default breakers (`['\n', ':', '"', '*']`) in the process; use `"none"` to not use any sequence breakers
122121
| `--dynatemp-range N` | dynamic temperature range (default: 0.0, 0.0 = disabled) |
123122
| `--dynatemp-exp N` | dynamic temperature exponent (default: 1.0) |
124-
| `--mirostat N` | use Mirostat sampling.<br/>Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.<br/>(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) |
123+
| `--mirostat N` | use Mirostat sampling.<br/>Top K, Nucleus and Locally Typical samplers are ignored if used.<br/>(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) |
125124
| `--mirostat-lr N` | Mirostat learning rate, parameter eta (default: 0.1) |
126125
| `--mirostat-ent N` | Mirostat target entropy, parameter tau (default: 5.0) |
127126
| `-l, --logit-bias TOKEN_ID(+/-)BIAS` | modifies the likelihood of token appearing in the completion,<br/>i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',<br/>or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' |
@@ -360,8 +359,6 @@ node index.js
360359
`stop`: Specify a JSON array of stopping strings.
361360
These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
362361

363-
`tfs_z`: Enable tail free sampling with parameter z. Default: `1.0`, which is disabled.
364-
365362
`typical_p`: Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
366363

367364
`repeat_penalty`: Control the repetition of token sequences in the generated text. Default: `1.1`
@@ -412,7 +409,7 @@ node index.js
412409

413410
`cache_prompt`: Re-use KV cache from a previous request if possible. This way the common prefix does not have to be re-processed, only the suffix that differs between the requests. Because (depending on the backend) the logits are **not** guaranteed to be bit-for-bit identical for different batch sizes (prompt processing vs. token generation) enabling this option can cause nondeterministic results. Default: `false`
414411

415-
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.
412+
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.
416413

417414
**Response format**
418415

@@ -738,7 +735,6 @@ Example:
738735
"repeat_penalty": 1.100000023841858,
739736
"samplers": [
740737
"top_k",
741-
"tfs_z",
742738
"typical_p",
743739
"top_p",
744740
"min_p",
@@ -752,7 +748,6 @@ Example:
752748
"stream": false,
753749
"task_id": 0,
754750
"temperature": 0.0,
755-
"tfs_z": 1.0,
756751
"top_k": 40,
757752
"top_p": 0.949999988079071,
758753
"typical_p": 1.0

examples/server/public/index-new.html

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
min_p: 0.05, // 0 = disabled; recommended for non-english: ~ 0.4
5151
xtc_probability: 0.0, // 0 = disabled;
5252
xtc_threshold: 0.1, // > 0.5 disables XTC;
53-
tfs_z: 1.0, // 1.0 = disabled
5453
typical_p: 1.0, // 1.0 = disabled
5554
presence_penalty: 0.0, // 0.0 = disabled
5655
frequency_penalty: 0.0, // 0.0 = disabled
@@ -849,7 +848,6 @@
849848
${FloatField({ label: "DRY Base", title: "Set the DRY repetition penalty base value. Default is 1.75", max: 3.0, min: 1.0, name: "dry_base", step: 0.01, value: params.value.dry_base })}
850849
${IntField({ label: "DRY Allowed Length", title: "Tokens that extend repetition beyond this receive exponentially increasing penalty. Default is 2", max: 10, min: 1, step: 1, name: "dry_allowed_length", value: params.value.dry_allowed_length })}
851850
${IntField({ label: "DRY Penalty Last N", title: "How many tokens to scan for repetitions. Default is -1, where 0 is disabled and -1 is context size", max: 2048, min: -1, step: 16, name: "dry_penalty_last_n", value: params.value.dry_penalty_last_n })}
852-
${FloatField({ label: "TFS-Z", title: "Activates tail-free sampling, a method used to limit the prediction of tokens that are too frequent. The parameter z controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
853851
${IntField({ label: "Min Keep", title: "If greater than 0, samplers are forced to return N possible tokens at minimum. Default is 0", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
854852
</fieldset>
855853
@@ -1149,7 +1147,6 @@ <h2>llama.cpp</h2>
11491147
xtc_probability: { snapValue: 0.0, snapRangeMultiplier: 4 },
11501148
xtc_threshold: { snapValue: 0.5, snapRangeMultiplier: 4 },
11511149
top_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
1152-
tfs_z: { snapValue: 1.0, snapRangeMultiplier: 4 },
11531150
typical_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
11541151
repeat_penalty: { snapValue: 1.0, snapRangeMultiplier: 4 },
11551152
presence_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },

examples/server/public/index.html

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,6 @@
314314
min_p: 0.05, // 0 = disabled
315315
xtc_probability: 0.0, // 0 = disabled;
316316
xtc_threshold: 0.1, // > 0.5 disables XTC;
317-
tfs_z: 1.0, // 1.0 = disabled
318317
typical_p: 1.0, // 1.0 = disabled
319318
presence_penalty: 0.0, // 0.0 = disabled
320319
frequency_penalty: 0.0, // 0.0 = disabled
@@ -1017,7 +1016,6 @@
10171016
<details>
10181017
<summary>More options</summary>
10191018
<fieldset class="two">
1020-
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
10211019
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
10221020
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
10231021
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

examples/server/server.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,6 @@ struct server_context {
810810
slot.sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
811811
slot.sparams.xtc_probability = json_value(data, "xtc_probability", default_sparams.xtc_probability);
812812
slot.sparams.xtc_threshold = json_value(data, "xtc_threshold", default_sparams.xtc_threshold);
813-
slot.sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
814813
slot.sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
815814
slot.sparams.temp = json_value(data, "temperature", default_sparams.temp);
816815
slot.sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
@@ -1151,7 +1150,6 @@ struct server_context {
11511150
{"min_p", slot.sparams.min_p},
11521151
{"xtc_probability", slot.sparams.xtc_probability},
11531152
{"xtc_threshold", slot.sparams.xtc_threshold},
1154-
{"tfs_z", slot.sparams.tfs_z},
11551153
{"typical_p", slot.sparams.typ_p},
11561154
{"repeat_last_n", slot.sparams.penalty_last_n},
11571155
{"repeat_penalty", slot.sparams.penalty_repeat},

examples/server/themes/buttons-top/index.html

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@
226226
top_k: 40, // <= 0 to use vocab size
227227
top_p: 0.95, // 1.0 = disabled
228228
min_p: 0.05, // 0 = disabled
229-
tfs_z: 1.0, // 1.0 = disabled
230229
typical_p: 1.0, // 1.0 = disabled
231230
presence_penalty: 0.0, // 0.0 = disabled
232231
frequency_penalty: 0.0, // 0.0 = disabled
@@ -788,7 +787,6 @@
788787
<details>
789788
<summary>More options</summary>
790789
<fieldset class="two">
791-
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
792790
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
793791
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
794792
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}

0 commit comments

Comments
 (0)