Skip to content

Commit abd27e7

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex
2 parents fe59f50 + f7406df commit abd27e7

38 files changed

+153046
-151517
lines changed

colab.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"source": [
4949
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\r\n",
5050
"\r\n",
51-
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
51+
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/Rocinante-12B-v1.1-GGUF/resolve/main/Rocinante-12B-v1.1-Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
5252
"Layers = 99 #@param [99]{allow-input: true}\r\n",
5353
"ContextSize = 4096 #@param [4096,8192] {allow-input: true}\r\n",
5454
"FlashAttention = True #@param {type:\"boolean\"}\r\n",

common/arg.cpp

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,13 @@ static void common_params_handle_model_default(common_params & params) {
129129
}
130130
params.hf_file = params.model;
131131
} else if (params.model.empty()) {
132-
params.model = fs_get_cache_file(string_split(params.hf_file, '/').back());
132+
params.model = fs_get_cache_file(string_split<std::string>(params.hf_file, '/').back());
133133
}
134134
} else if (!params.model_url.empty()) {
135135
if (params.model.empty()) {
136-
auto f = string_split(params.model_url, '#').front();
137-
f = string_split(f, '?').front();
138-
params.model = fs_get_cache_file(string_split(f, '/').back());
136+
auto f = string_split<std::string>(params.model_url, '#').front();
137+
f = string_split<std::string>(f, '?').front();
138+
params.model = fs_get_cache_file(string_split<std::string>(f, '/').back());
139139
}
140140
} else if (params.model.empty()) {
141141
params.model = DEFAULT_MODEL_PATH;
@@ -252,6 +252,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
252252
for (auto & antiprompt : params.antiprompt) {
253253
string_process_escapes(antiprompt);
254254
}
255+
for (auto & seq_breaker : params.sparams.dry_sequence_breakers) {
256+
string_process_escapes(seq_breaker);
257+
}
255258
}
256259

257260
if (!params.kv_overrides.empty()) {
@@ -880,7 +883,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
880883
{"--samplers"}, "SAMPLERS",
881884
string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()),
882885
[](common_params & params, const std::string & value) {
883-
const auto sampler_names = string_split(value, ';');
886+
const auto sampler_names = string_split<std::string>(value, ';');
884887
params.sparams.samplers = common_sampler_types_from_names(sampler_names, true);
885888
}
886889
).set_sparam());
@@ -941,13 +944,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
941944
params.sparams.min_p = std::stof(value);
942945
}
943946
).set_sparam());
944-
add_opt(common_arg(
945-
{"--tfs"}, "N",
946-
string_format("tail free sampling, parameter z (default: %.1f, 1.0 = disabled)", (double)params.sparams.tfs_z),
947-
[](common_params & params, const std::string & value) {
948-
params.sparams.tfs_z = std::stof(value);
949-
}
950-
).set_sparam());
951947
add_opt(common_arg(
952948
{"--xtc-probability"}, "N",
953949
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sparams.xtc_probability),
@@ -998,6 +994,64 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
998994
params.sparams.penalty_freq = std::stof(value);
999995
}
1000996
).set_sparam());
997+
add_opt(common_arg(
998+
{"--dry-multiplier"}, "N",
999+
string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sparams.dry_multiplier),
1000+
[](common_params & params, const std::string & value) {
1001+
params.sparams.dry_multiplier = std::stof(value);
1002+
}
1003+
).set_sparam());
1004+
add_opt(common_arg(
1005+
{"--dry-base"}, "N",
1006+
string_format("set DRY sampling base value (default: %.2f)", (double)params.sparams.dry_base),
1007+
[](common_params & params, const std::string & value) {
1008+
float potential_base = std::stof(value);
1009+
if (potential_base >= 1.0f)
1010+
{
1011+
params.sparams.dry_base = potential_base;
1012+
}
1013+
}
1014+
).set_sparam());
1015+
add_opt(common_arg(
1016+
{"--dry-allowed-length"}, "N",
1017+
string_format("set allowed length for DRY sampling (default: %d)", params.sparams.dry_allowed_length),
1018+
[](common_params & params, int value) {
1019+
params.sparams.dry_allowed_length = value;
1020+
}
1021+
).set_sparam());
1022+
add_opt(common_arg(
1023+
{"--dry-penalty-last-n"}, "N",
1024+
string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sparams.dry_penalty_last_n),
1025+
[](common_params & params, int value) {
1026+
params.sparams.dry_penalty_last_n = value;
1027+
}
1028+
).set_sparam());
1029+
add_opt(common_arg(
1030+
{"--dry-sequence-breaker"}, "STRING",
1031+
string_format("add sequence breaker for DRY sampling, clearing out default breakers (%s) in the process; use \"none\" to not use any sequence breakers\n",
1032+
params.sparams.dry_sequence_breakers.empty() ? "none" :
1033+
std::accumulate(std::next(params.sparams.dry_sequence_breakers.begin()),
1034+
params.sparams.dry_sequence_breakers.end(),
1035+
std::string("'") + (params.sparams.dry_sequence_breakers[0] == "\n" ? "\\n" : params.sparams.dry_sequence_breakers[0]) + "'",
1036+
[](const std::string& a, const std::string& b) {
1037+
std::string formatted_b = (b == "\n") ? "\\n" : b;
1038+
return a + ", '" + formatted_b + "'";
1039+
}).c_str()),
1040+
[](common_params & params, const std::string & value) {
1041+
static bool defaults_cleared = false;
1042+
1043+
if (!defaults_cleared) {
1044+
params.sparams.dry_sequence_breakers.clear();
1045+
defaults_cleared = true;
1046+
}
1047+
1048+
if (value == "none") {
1049+
params.sparams.dry_sequence_breakers.clear();
1050+
} else {
1051+
params.sparams.dry_sequence_breakers.emplace_back(value);
1052+
}
1053+
}
1054+
).set_sparam());
10011055
add_opt(common_arg(
10021056
{"--dynatemp-range"}, "N",
10031057
string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sparams.dynatemp_range),
@@ -1014,7 +1068,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
10141068
).set_sparam());
10151069
add_opt(common_arg(
10161070
{"--mirostat"}, "N",
1017-
string_format("use Mirostat sampling.\nTop K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
1071+
string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n"
10181072
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sparams.mirostat),
10191073
[](common_params & params, int value) {
10201074
params.sparams.mirostat = value;

common/common.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -418,19 +418,6 @@ std::string string_format(const char * fmt, ...) {
418418
return std::string(buf.data(), size);
419419
}
420420

421-
std::vector<std::string> string_split(std::string input, char separator) {
422-
std::vector<std::string> parts;
423-
size_t separator_pos = input.find(separator);
424-
while (separator_pos != std::string::npos) {
425-
std::string part = input.substr(0, separator_pos);
426-
parts.emplace_back(part);
427-
input = input.substr(separator_pos + 1);
428-
separator_pos = input.find(separator);
429-
}
430-
parts.emplace_back(input);
431-
return parts;
432-
}
433-
434421
std::string string_strip(const std::string & str) {
435422
size_t start = 0;
436423
size_t end = str.size();
@@ -2024,6 +2011,10 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20242011
fprintf(stream, "chunks: %d # default: -1 (unlimited)\n", params.n_chunks);
20252012
fprintf(stream, "color: %s # default: false\n", params.use_color ? "true" : "false");
20262013
fprintf(stream, "ctx_size: %d # default: 512\n", params.n_ctx);
2014+
fprintf(stream, "dry_allowed_length: %d # default: 2\n", sparams.dry_allowed_length);
2015+
fprintf(stream, "dry_base: %.2f # default: 1.75\n", sparams.dry_base);
2016+
fprintf(stream, "dry_multiplier: %.1f # default: 0.0\n", sparams.dry_multiplier);
2017+
fprintf(stream, "dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n", sparams.dry_penalty_last_n);
20272018
fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false");
20282019
fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n");
20292020
fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq);
@@ -2104,7 +2095,6 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
21042095
const std::vector<float> tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices());
21052096
yaml_dump_vector_float(stream, "tensor_split", tensor_split_vector);
21062097

2107-
fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
21082098
fprintf(stream, "threads: %d # default: %u\n", params.cpuparams.n_threads, std::thread::hardware_concurrency());
21092099
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);
21102100
fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p);

0 commit comments

Comments
 (0)