Skip to content

Commit 4dfbcf9

Browse files
Merge branch 'ggerganov:master' into master
2 parents 9b56176 + cc2983d commit 4dfbcf9

39 files changed

+2432
-956
lines changed

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
5353
exit 1
5454
fi
5555

56-
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
56+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
5757
fi
5858

5959
if [ ! -z ${GG_BUILD_VULKAN} ]; then

common/arg.cpp

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,13 @@ static void common_params_handle_model_default(common_params & params) {
128128
}
129129
params.hf_file = params.model;
130130
} else if (params.model.empty()) {
131-
params.model = fs_get_cache_file(string_split(params.hf_file, '/').back());
131+
params.model = fs_get_cache_file(string_split<std::string>(params.hf_file, '/').back());
132132
}
133133
} else if (!params.model_url.empty()) {
134134
if (params.model.empty()) {
135-
auto f = string_split(params.model_url, '#').front();
136-
f = string_split(f, '?').front();
137-
params.model = fs_get_cache_file(string_split(f, '/').back());
135+
auto f = string_split<std::string>(params.model_url, '#').front();
136+
f = string_split<std::string>(f, '?').front();
137+
params.model = fs_get_cache_file(string_split<std::string>(f, '/').back());
138138
}
139139
} else if (params.model.empty()) {
140140
params.model = DEFAULT_MODEL_PATH;
@@ -251,6 +251,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
251251
for (auto & antiprompt : params.antiprompt) {
252252
string_process_escapes(antiprompt);
253253
}
254+
for (auto & seq_breaker : params.sparams.dry_sequence_breakers) {
255+
string_process_escapes(seq_breaker);
256+
}
254257
}
255258

256259
if (!params.kv_overrides.empty()) {
@@ -879,7 +882,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
879882
{"--samplers"}, "SAMPLERS",
880883
string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()),
881884
[](common_params & params, const std::string & value) {
882-
const auto sampler_names = string_split(value, ';');
885+
const auto sampler_names = string_split<std::string>(value, ';');
883886
params.sparams.samplers = common_sampler_types_from_names(sampler_names, true);
884887
}
885888
).set_sparam());
@@ -997,6 +1000,64 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
9971000
params.sparams.penalty_freq = std::stof(value);
9981001
}
9991002
).set_sparam());
1003+
add_opt(common_arg(
1004+
{"--dry-multiplier"}, "N",
1005+
string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sparams.dry_multiplier),
1006+
[](common_params & params, const std::string & value) {
1007+
params.sparams.dry_multiplier = std::stof(value);
1008+
}
1009+
).set_sparam());
1010+
add_opt(common_arg(
1011+
{"--dry-base"}, "N",
1012+
string_format("set DRY sampling base value (default: %.2f)", (double)params.sparams.dry_base),
1013+
[](common_params & params, const std::string & value) {
1014+
float potential_base = std::stof(value);
1015+
if (potential_base >= 1.0f)
1016+
{
1017+
params.sparams.dry_base = potential_base;
1018+
}
1019+
}
1020+
).set_sparam());
1021+
add_opt(common_arg(
1022+
{"--dry-allowed-length"}, "N",
1023+
string_format("set allowed length for DRY sampling (default: %d)", params.sparams.dry_allowed_length),
1024+
[](common_params & params, int value) {
1025+
params.sparams.dry_allowed_length = value;
1026+
}
1027+
).set_sparam());
1028+
add_opt(common_arg(
1029+
{"--dry-penalty-last-n"}, "N",
1030+
string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sparams.dry_penalty_last_n),
1031+
[](common_params & params, int value) {
1032+
params.sparams.dry_penalty_last_n = value;
1033+
}
1034+
).set_sparam());
1035+
add_opt(common_arg(
1036+
{"--dry-sequence-breaker"}, "STRING",
1037+
string_format("add sequence breaker for DRY sampling, clearing out default breakers (%s) in the process; use \"none\" to not use any sequence breakers\n",
1038+
params.sparams.dry_sequence_breakers.empty() ? "none" :
1039+
std::accumulate(std::next(params.sparams.dry_sequence_breakers.begin()),
1040+
params.sparams.dry_sequence_breakers.end(),
1041+
std::string("'") + (params.sparams.dry_sequence_breakers[0] == "\n" ? "\\n" : params.sparams.dry_sequence_breakers[0]) + "'",
1042+
[](const std::string& a, const std::string& b) {
1043+
std::string formatted_b = (b == "\n") ? "\\n" : b;
1044+
return a + ", '" + formatted_b + "'";
1045+
}).c_str()),
1046+
[](common_params & params, const std::string & value) {
1047+
static bool defaults_cleared = false;
1048+
1049+
if (!defaults_cleared) {
1050+
params.sparams.dry_sequence_breakers.clear();
1051+
defaults_cleared = true;
1052+
}
1053+
1054+
if (value == "none") {
1055+
params.sparams.dry_sequence_breakers.clear();
1056+
} else {
1057+
params.sparams.dry_sequence_breakers.emplace_back(value);
1058+
}
1059+
}
1060+
).set_sparam());
10001061
add_opt(common_arg(
10011062
{"--dynatemp-range"}, "N",
10021063
string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sparams.dynatemp_range),

common/common.cpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -416,19 +416,6 @@ std::string string_format(const char * fmt, ...) {
416416
return std::string(buf.data(), size);
417417
}
418418

419-
std::vector<std::string> string_split(std::string input, char separator) {
420-
std::vector<std::string> parts;
421-
size_t separator_pos = input.find(separator);
422-
while (separator_pos != std::string::npos) {
423-
std::string part = input.substr(0, separator_pos);
424-
parts.emplace_back(part);
425-
input = input.substr(separator_pos + 1);
426-
separator_pos = input.find(separator);
427-
}
428-
parts.emplace_back(input);
429-
return parts;
430-
}
431-
432419
std::string string_strip(const std::string & str) {
433420
size_t start = 0;
434421
size_t end = str.size();
@@ -2019,6 +2006,10 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20192006
fprintf(stream, "chunks: %d # default: -1 (unlimited)\n", params.n_chunks);
20202007
fprintf(stream, "color: %s # default: false\n", params.use_color ? "true" : "false");
20212008
fprintf(stream, "ctx_size: %d # default: 512\n", params.n_ctx);
2009+
fprintf(stream, "dry_allowed_length: %d # default: 2\n", sparams.dry_allowed_length);
2010+
fprintf(stream, "dry_base: %.2f # default: 1.75\n", sparams.dry_base);
2011+
fprintf(stream, "dry_multiplier: %.1f # default: 0.0\n", sparams.dry_multiplier);
2012+
fprintf(stream, "dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n", sparams.dry_penalty_last_n);
20222013
fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false");
20232014
fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n");
20242015
fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq);

common/common.h

Lines changed: 56 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,15 @@ enum llama_example {
8484

8585
enum common_sampler_type {
8686
COMMON_SAMPLER_TYPE_NONE = 0,
87-
COMMON_SAMPLER_TYPE_TOP_K = 1,
88-
COMMON_SAMPLER_TYPE_TOP_P = 2,
89-
COMMON_SAMPLER_TYPE_MIN_P = 3,
90-
COMMON_SAMPLER_TYPE_TFS_Z = 4,
91-
COMMON_SAMPLER_TYPE_TYPICAL_P = 5,
92-
COMMON_SAMPLER_TYPE_TEMPERATURE = 6,
93-
COMMON_SAMPLER_TYPE_XTC = 7,
94-
COMMON_SAMPLER_TYPE_INFILL = 8,
87+
COMMON_SAMPLER_TYPE_DRY = 1,
88+
COMMON_SAMPLER_TYPE_TOP_K = 2,
89+
COMMON_SAMPLER_TYPE_TOP_P = 3,
90+
COMMON_SAMPLER_TYPE_MIN_P = 4,
91+
COMMON_SAMPLER_TYPE_TFS_Z = 5,
92+
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
93+
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
94+
COMMON_SAMPLER_TYPE_XTC = 8,
95+
COMMON_SAMPLER_TYPE_INFILL = 9,
9596
};
9697

9798
// dimensionality reduction methods, used by cvector-generator
@@ -104,32 +105,39 @@ enum dimre_method {
104105
struct common_sampler_params {
105106
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
106107

107-
int32_t n_prev = 64; // number of previous tokens to remember
108-
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
109-
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
110-
int32_t top_k = 40; // <= 0 to use vocab size
111-
float top_p = 0.95f; // 1.0 = disabled
112-
float min_p = 0.05f; // 0.0 = disabled
113-
float xtc_probability = 0.00f; // 0.0 = disabled
114-
float xtc_threshold = 0.10f; // > 0.5 disables XTC
115-
float tfs_z = 1.00f; // 1.0 = disabled
116-
float typ_p = 1.00f; // typical_p, 1.0 = disabled
117-
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
118-
float dynatemp_range = 0.00f; // 0.0 = disabled
119-
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
120-
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
121-
float penalty_repeat = 1.00f; // 1.0 = disabled
122-
float penalty_freq = 0.00f; // 0.0 = disabled
123-
float penalty_present = 0.00f; // 0.0 = disabled
124-
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
125-
float mirostat_tau = 5.00f; // target entropy
126-
float mirostat_eta = 0.10f; // learning rate
127-
bool penalize_nl = false; // consider newlines as a repeatable token
128-
bool ignore_eos = false;
129-
bool no_perf = false; // disable performance metrics
108+
int32_t n_prev = 64; // number of previous tokens to remember
109+
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
110+
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
111+
int32_t top_k = 40; // <= 0 to use vocab size
112+
float top_p = 0.95f; // 1.0 = disabled
113+
float min_p = 0.05f; // 0.0 = disabled
114+
float xtc_probability = 0.00f; // 0.0 = disabled
115+
float xtc_threshold = 0.10f; // > 0.5 disables XTC
116+
float tfs_z = 1.00f; // 1.0 = disabled
117+
float typ_p = 1.00f; // typical_p, 1.0 = disabled
118+
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
119+
float dynatemp_range = 0.00f; // 0.0 = disabled
120+
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
121+
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
122+
float penalty_repeat = 1.00f; // 1.0 = disabled
123+
float penalty_freq = 0.00f; // 0.0 = disabled
124+
float penalty_present = 0.00f; // 0.0 = disabled
125+
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
126+
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
127+
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
128+
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
129+
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
130+
float mirostat_tau = 5.00f; // target entropy
131+
float mirostat_eta = 0.10f; // learning rate
132+
bool penalize_nl = false; // consider newlines as a repeatable token
133+
bool ignore_eos = false;
134+
bool no_perf = false; // disable performance metrics
135+
136+
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
130137

131138

132139
std::vector<enum common_sampler_type> samplers = {
140+
COMMON_SAMPLER_TYPE_DRY,
133141
COMMON_SAMPLER_TYPE_TOP_K,
134142
COMMON_SAMPLER_TYPE_TFS_Z,
135143
COMMON_SAMPLER_TYPE_TYPICAL_P,
@@ -380,15 +388,14 @@ bool set_process_priority(enum ggml_sched_priority prio);
380388
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
381389
std::string string_format(const char * fmt, ...);
382390

383-
std::vector<std::string> string_split(std::string input, char separator);
384-
385391
std::string string_strip(const std::string & str);
386392
std::string string_get_sortable_timestamp();
387393

388394
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
389395

390396
template<class T>
391397
static std::vector<T> string_split(const std::string & str, char delim) {
398+
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
392399
std::vector<T> values;
393400
std::istringstream str_stream(str);
394401
std::string token;
@@ -401,6 +408,22 @@ static std::vector<T> string_split(const std::string & str, char delim) {
401408
return values;
402409
}
403410

411+
template<>
412+
std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
413+
{
414+
std::vector<std::string> parts;
415+
size_t begin_pos = 0;
416+
size_t separator_pos = input.find(separator);
417+
while (separator_pos != std::string::npos) {
418+
std::string part = input.substr(begin_pos, separator_pos - begin_pos);
419+
parts.emplace_back(part);
420+
begin_pos = separator_pos + 1;
421+
separator_pos = input.find(separator, begin_pos);
422+
}
423+
parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos));
424+
return parts;
425+
}
426+
404427
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
405428
void string_process_escapes(std::string & input);
406429

common/sampling.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,11 @@ std::string common_sampler_params::print() const {
130130

131131
snprintf(result, sizeof(result),
132132
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
133+
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
133134
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
134135
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
135136
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
137+
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
136138
top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
137139
mirostat, mirostat_eta, mirostat_tau);
138140

@@ -174,6 +176,17 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
174176
if (params.mirostat == 0) {
175177
for (const auto & cnstr : params.samplers) {
176178
switch (cnstr) {
179+
case COMMON_SAMPLER_TYPE_DRY:
180+
{
181+
std::vector<const char*> c_breakers;
182+
c_breakers.reserve(params.dry_sequence_breakers.size());
183+
for (const auto& str : params.dry_sequence_breakers) {
184+
c_breakers.push_back(str.c_str());
185+
}
186+
187+
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
188+
}
189+
break;
177190
case COMMON_SAMPLER_TYPE_TOP_K:
178191
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
179192
break;
@@ -358,6 +371,7 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_
358371

359372
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
360373
switch (cnstr) {
374+
case COMMON_SAMPLER_TYPE_DRY: return 'd';
361375
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
362376
case COMMON_SAMPLER_TYPE_TFS_Z: return 'f';
363377
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
@@ -372,6 +386,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
372386

373387
std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
374388
switch (cnstr) {
389+
case COMMON_SAMPLER_TYPE_DRY: return "dry";
375390
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
376391
case COMMON_SAMPLER_TYPE_TFS_Z: return "tfs_z";
377392
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
@@ -386,6 +401,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
386401

387402
std::vector<common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
388403
std::unordered_map<std::string, common_sampler_type> sampler_canonical_name_map {
404+
{ "dry", COMMON_SAMPLER_TYPE_DRY },
389405
{ "top_k", COMMON_SAMPLER_TYPE_TOP_K },
390406
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
391407
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
@@ -434,6 +450,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
434450

435451
std::vector<common_sampler_type> common_sampler_types_from_chars(const std::string & chars) {
436452
std::unordered_map<char, common_sampler_type> sampler_name_map = {
453+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
437454
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
438455
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TFS_Z), COMMON_SAMPLER_TYPE_TFS_Z },
439456
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },

convert_hf_to_gguf.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
573573
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
574574
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
575575
res = "bert-bge"
576+
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
577+
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
578+
res = "bert-bge-large"
576579
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
577580
# ref: https://huggingface.co/mosaicml/mpt-7b
578581
res = "mpt"
@@ -2865,6 +2868,7 @@ def set_vocab(self):
28652868
self.gguf_writer.add_token_types(toktypes)
28662869
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
28672870
special_vocab.chat_template = "rwkv-world"
2871+
# hack: Add '\n\n' as the EOT token to make it chat normally
28682872
special_vocab._set_special_token("eot", 261)
28692873
special_vocab.add_to_gguf(self.gguf_writer)
28702874

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class TOKENIZER_TYPE(IntEnum):
7272
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
7373
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
7474
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75+
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
7576
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
7677
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
7778
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },

0 commit comments

Comments
 (0)