@@ -128,13 +128,13 @@ static void common_params_handle_model_default(common_params & params) {
128
128
}
129
129
params.hf_file = params.model ;
130
130
} else if (params.model .empty ()) {
131
- params.model = fs_get_cache_file (string_split (params.hf_file , ' /' ).back ());
131
+ params.model = fs_get_cache_file (string_split<std::string> (params.hf_file , ' /' ).back ());
132
132
}
133
133
} else if (!params.model_url .empty ()) {
134
134
if (params.model .empty ()) {
135
- auto f = string_split (params.model_url , ' #' ).front ();
136
- f = string_split (f, ' ?' ).front ();
137
- params.model = fs_get_cache_file (string_split (f, ' /' ).back ());
135
+ auto f = string_split<std::string> (params.model_url , ' #' ).front ();
136
+ f = string_split<std::string> (f, ' ?' ).front ();
137
+ params.model = fs_get_cache_file (string_split<std::string> (f, ' /' ).back ());
138
138
}
139
139
} else if (params.model .empty ()) {
140
140
params.model = DEFAULT_MODEL_PATH;
@@ -251,6 +251,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
251
251
for (auto & antiprompt : params.antiprompt ) {
252
252
string_process_escapes (antiprompt);
253
253
}
254
+ for (auto & seq_breaker : params.sparams .dry_sequence_breakers ) {
255
+ string_process_escapes (seq_breaker);
256
+ }
254
257
}
255
258
256
259
if (!params.kv_overrides .empty ()) {
@@ -879,7 +882,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
879
882
{" --samplers" }, " SAMPLERS" ,
880
883
string_format (" samplers that will be used for generation in the order, separated by \' ;\'\n (default: %s)" , sampler_type_names.c_str ()),
881
884
[](common_params & params, const std::string & value) {
882
- const auto sampler_names = string_split (value, ' ;' );
885
+ const auto sampler_names = string_split<std::string> (value, ' ;' );
883
886
params.sparams .samplers = common_sampler_types_from_names (sampler_names, true );
884
887
}
885
888
).set_sparam ());
@@ -941,10 +944,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
941
944
}
942
945
).set_sparam ());
943
946
add_opt (common_arg (
944
- {" --tfs" }, " N" ,
945
- string_format (" tail free sampling, parameter z (default: %.1f, 1.0 = disabled)" , (double )params.sparams .tfs_z ),
947
+ {" --xtc-probability" }, " N" ,
948
+ string_format (" xtc probability (default: %.1f, 0.0 = disabled)" , (double )params.sparams .xtc_probability ),
949
+ [](common_params & params, const std::string & value) {
950
+ params.sparams .xtc_probability = std::stof (value);
951
+ }
952
+ ).set_sparam ());
953
+ add_opt (common_arg (
954
+ {" --xtc-threshold" }, " N" ,
955
+ string_format (" xtc threshold (default: %.1f, 1.0 = disabled)" , (double )params.sparams .xtc_threshold ),
946
956
[](common_params & params, const std::string & value) {
947
- params.sparams .tfs_z = std::stof (value);
957
+ params.sparams .xtc_threshold = std::stof (value);
948
958
}
949
959
).set_sparam ());
950
960
add_opt (common_arg (
@@ -983,6 +993,64 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
983
993
params.sparams .penalty_freq = std::stof (value);
984
994
}
985
995
).set_sparam ());
996
+ add_opt (common_arg (
997
+ {" --dry-multiplier" }, " N" ,
998
+ string_format (" set DRY sampling multiplier (default: %.1f, 0.0 = disabled)" , (double )params.sparams .dry_multiplier ),
999
+ [](common_params & params, const std::string & value) {
1000
+ params.sparams .dry_multiplier = std::stof (value);
1001
+ }
1002
+ ).set_sparam ());
1003
+ add_opt (common_arg (
1004
+ {" --dry-base" }, " N" ,
1005
+ string_format (" set DRY sampling base value (default: %.2f)" , (double )params.sparams .dry_base ),
1006
+ [](common_params & params, const std::string & value) {
1007
+ float potential_base = std::stof (value);
1008
+ if (potential_base >= 1 .0f )
1009
+ {
1010
+ params.sparams .dry_base = potential_base;
1011
+ }
1012
+ }
1013
+ ).set_sparam ());
1014
+ add_opt (common_arg (
1015
+ {" --dry-allowed-length" }, " N" ,
1016
+ string_format (" set allowed length for DRY sampling (default: %d)" , params.sparams .dry_allowed_length ),
1017
+ [](common_params & params, int value) {
1018
+ params.sparams .dry_allowed_length = value;
1019
+ }
1020
+ ).set_sparam ());
1021
+ add_opt (common_arg (
1022
+ {" --dry-penalty-last-n" }, " N" ,
1023
+ string_format (" set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)" , params.sparams .dry_penalty_last_n ),
1024
+ [](common_params & params, int value) {
1025
+ params.sparams .dry_penalty_last_n = value;
1026
+ }
1027
+ ).set_sparam ());
1028
+ add_opt (common_arg (
1029
+ {" --dry-sequence-breaker" }, " STRING" ,
1030
+ string_format (" add sequence breaker for DRY sampling, clearing out default breakers (%s) in the process; use \" none\" to not use any sequence breakers\n " ,
1031
+ params.sparams .dry_sequence_breakers .empty () ? " none" :
1032
+ std::accumulate (std::next (params.sparams .dry_sequence_breakers .begin ()),
1033
+ params.sparams .dry_sequence_breakers .end (),
1034
+ std::string (" '" ) + (params.sparams .dry_sequence_breakers [0 ] == " \n " ? " \\ n" : params.sparams .dry_sequence_breakers [0 ]) + " '" ,
1035
+ [](const std::string& a, const std::string& b) {
1036
+ std::string formatted_b = (b == " \n " ) ? " \\ n" : b;
1037
+ return a + " , '" + formatted_b + " '" ;
1038
+ }).c_str ()),
1039
+ [](common_params & params, const std::string & value) {
1040
+ static bool defaults_cleared = false ;
1041
+
1042
+ if (!defaults_cleared) {
1043
+ params.sparams .dry_sequence_breakers .clear ();
1044
+ defaults_cleared = true ;
1045
+ }
1046
+
1047
+ if (value == " none" ) {
1048
+ params.sparams .dry_sequence_breakers .clear ();
1049
+ } else {
1050
+ params.sparams .dry_sequence_breakers .emplace_back (value);
1051
+ }
1052
+ }
1053
+ ).set_sparam ());
986
1054
add_opt (common_arg (
987
1055
{" --dynatemp-range" }, " N" ,
988
1056
string_format (" dynamic temperature range (default: %.1f, 0.0 = disabled)" , (double )params.sparams .dynatemp_range ),
@@ -999,7 +1067,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
999
1067
).set_sparam ());
1000
1068
add_opt (common_arg (
1001
1069
{" --mirostat" }, " N" ,
1002
- string_format (" use Mirostat sampling.\n Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n "
1070
+ string_format (" use Mirostat sampling.\n Top K, Nucleus and Locally Typical samplers are ignored if used.\n "
1003
1071
" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)" , params.sparams .mirostat ),
1004
1072
[](common_params & params, int value) {
1005
1073
params.sparams .mirostat = value;
@@ -1083,7 +1151,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1083
1151
}
1084
1152
).set_examples ({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_POOLING" ));
1085
1153
add_opt (common_arg (
1086
- {" --attention" }, " {causal,non, causal}" ,
1154
+ {" --attention" }, " {causal,non- causal}" ,
1087
1155
" attention type for embeddings, use model default if unspecified" ,
1088
1156
[](common_params & params, const std::string & value) {
1089
1157
/* */ if (value == " causal" ) { params.attention_type = LLAMA_ATTENTION_TYPE_CAUSAL; }
@@ -1681,7 +1749,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1681
1749
).set_examples ({LLAMA_EXAMPLE_BENCH}));
1682
1750
add_opt (common_arg (
1683
1751
{" --embd-normalize" }, " N" ,
1684
- string_format (" normalisation for embendings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)" , params.embd_normalize ),
1752
+ string_format (" normalisation for embeddings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)" , params.embd_normalize ),
1685
1753
[](common_params & params, int value) {
1686
1754
params.embd_normalize = value;
1687
1755
}
@@ -1695,7 +1763,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1695
1763
).set_examples ({LLAMA_EXAMPLE_EMBEDDING}));
1696
1764
add_opt (common_arg (
1697
1765
{" --embd-separator" }, " STRING" ,
1698
- " separator of embendings (default \\ n) for example \" <#sep#>\" " ,
1766
+ " separator of embeddings (default \\ n) for example \" <#sep#>\" " ,
1699
1767
[](common_params & params, const std::string & value) {
1700
1768
params.embd_sep = value;
1701
1769
}
@@ -1788,6 +1856,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1788
1856
params.n_threads_http = value;
1789
1857
}
1790
1858
).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_THREADS_HTTP" ));
1859
+ add_opt (common_arg (
1860
+ {" --cache-reuse" }, " N" ,
1861
+ string_format (" min chunk size to attempt reusing from the cache via KV shifting (default: %d)" , params.n_cache_reuse ),
1862
+ [](common_params & params, int value) {
1863
+ params.n_cache_reuse = value;
1864
+ }
1865
+ ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_CACHE_REUSE" ));
1791
1866
add_opt (common_arg (
1792
1867
{" --metrics" },
1793
1868
string_format (" enable prometheus compatible metrics endpoint (default: %s)" , params.endpoint_metrics ? " enabled" : " disabled" ),
0 commit comments