Skip to content

Commit 992a4d6

Browse files
committed
feat: sync llama.cpp
1 parent 0722848 commit 992a4d6

24 files changed

+3114
-2317
lines changed

cpp/common.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -422,19 +422,6 @@ std::string string_format(const char * fmt, ...) {
422422
return std::string(buf.data(), size);
423423
}
424424

425-
std::vector<std::string> string_split(std::string input, char separator) {
426-
std::vector<std::string> parts;
427-
size_t separator_pos = input.find(separator);
428-
while (separator_pos != std::string::npos) {
429-
std::string part = input.substr(0, separator_pos);
430-
parts.emplace_back(part);
431-
input = input.substr(separator_pos + 1);
432-
separator_pos = input.find(separator);
433-
}
434-
parts.emplace_back(input);
435-
return parts;
436-
}
437-
438425
std::string string_strip(const std::string & str) {
439426
size_t start = 0;
440427
size_t end = str.size();
@@ -2027,6 +2014,10 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
20272014
fprintf(stream, "chunks: %d # default: -1 (unlimited)\n", params.n_chunks);
20282015
fprintf(stream, "color: %s # default: false\n", params.use_color ? "true" : "false");
20292016
fprintf(stream, "ctx_size: %d # default: 512\n", params.n_ctx);
2017+
fprintf(stream, "dry_allowed_length: %d # default: 2\n", sparams.dry_allowed_length);
2018+
fprintf(stream, "dry_base: %.2f # default: 1.75\n", sparams.dry_base);
2019+
fprintf(stream, "dry_multiplier: %.1f # default: 0.0\n", sparams.dry_multiplier);
2020+
fprintf(stream, "dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n", sparams.dry_penalty_last_n);
20302021
fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false");
20312022
fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n");
20322023
fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq);
@@ -2107,7 +2098,6 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
21072098
const std::vector<float> tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices());
21082099
yaml_dump_vector_float(stream, "tensor_split", tensor_split_vector);
21092100

2110-
fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
21112101
fprintf(stream, "threads: %d # default: %u\n", params.cpuparams.n_threads, std::thread::hardware_concurrency());
21122102
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);
21132103
fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p);

cpp/common.h

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,15 @@ enum llama_example {
9595

9696
enum common_sampler_type {
9797
COMMON_SAMPLER_TYPE_NONE = 0,
98-
COMMON_SAMPLER_TYPE_TOP_K = 1,
99-
COMMON_SAMPLER_TYPE_TOP_P = 2,
100-
COMMON_SAMPLER_TYPE_MIN_P = 3,
101-
COMMON_SAMPLER_TYPE_TFS_Z = 4,
102-
COMMON_SAMPLER_TYPE_TYPICAL_P = 5,
103-
COMMON_SAMPLER_TYPE_TEMPERATURE = 6,
104-
COMMON_SAMPLER_TYPE_XTC = 7,
105-
COMMON_SAMPLER_TYPE_INFILL = 8,
98+
COMMON_SAMPLER_TYPE_DRY = 1,
99+
COMMON_SAMPLER_TYPE_TOP_K = 2,
100+
COMMON_SAMPLER_TYPE_TOP_P = 3,
101+
COMMON_SAMPLER_TYPE_MIN_P = 4,
102+
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
103+
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
104+
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
105+
COMMON_SAMPLER_TYPE_XTC = 8,
106+
COMMON_SAMPLER_TYPE_INFILL = 9,
106107
};
107108

108109
// dimensionality reduction methods, used by cvector-generator
@@ -115,34 +116,39 @@ enum dimre_method {
115116
struct common_sampler_params {
116117
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
117118

118-
int32_t n_prev = 64; // number of previous tokens to remember
119-
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
120-
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
121-
int32_t top_k = 40; // <= 0 to use vocab size
122-
float top_p = 0.95f; // 1.0 = disabled
123-
float min_p = 0.05f; // 0.0 = disabled
124-
float xtc_probability = 0.00f; // 0.0 = disabled
125-
float xtc_threshold = 0.10f; // > 0.5 disables XTC
126-
float tfs_z = 1.00f; // 1.0 = disabled
127-
float typ_p = 1.00f; // typical_p, 1.0 = disabled
128-
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
129-
float dynatemp_range = 0.00f; // 0.0 = disabled
130-
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
131-
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
132-
float penalty_repeat = 1.00f; // 1.0 = disabled
133-
float penalty_freq = 0.00f; // 0.0 = disabled
134-
float penalty_present = 0.00f; // 0.0 = disabled
135-
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
136-
float mirostat_tau = 5.00f; // target entropy
137-
float mirostat_eta = 0.10f; // learning rate
138-
bool penalize_nl = false; // consider newlines as a repeatable token
139-
bool ignore_eos = false;
140-
bool no_perf = false; // disable performance metrics
119+
int32_t n_prev = 64; // number of previous tokens to remember
120+
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
121+
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
122+
int32_t top_k = 40; // <= 0 to use vocab size
123+
float top_p = 0.95f; // 1.0 = disabled
124+
float min_p = 0.05f; // 0.0 = disabled
125+
float xtc_probability = 0.00f; // 0.0 = disabled
126+
float xtc_threshold = 0.10f; // > 0.5 disables XTC
127+
float typ_p = 1.00f; // typical_p, 1.0 = disabled
128+
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
129+
float dynatemp_range = 0.00f; // 0.0 = disabled
130+
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
131+
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
132+
float penalty_repeat = 1.00f; // 1.0 = disabled
133+
float penalty_freq = 0.00f; // 0.0 = disabled
134+
float penalty_present = 0.00f; // 0.0 = disabled
135+
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
136+
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
137+
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
138+
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
139+
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
140+
float mirostat_tau = 5.00f; // target entropy
141+
float mirostat_eta = 0.10f; // learning rate
142+
bool penalize_nl = false; // consider newlines as a repeatable token
143+
bool ignore_eos = false;
144+
bool no_perf = false; // disable performance metrics
145+
146+
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
141147

142148

143149
std::vector<enum common_sampler_type> samplers = {
150+
COMMON_SAMPLER_TYPE_DRY,
144151
COMMON_SAMPLER_TYPE_TOP_K,
145-
COMMON_SAMPLER_TYPE_TFS_Z,
146152
COMMON_SAMPLER_TYPE_TYPICAL_P,
147153
COMMON_SAMPLER_TYPE_TOP_P,
148154
COMMON_SAMPLER_TYPE_MIN_P,
@@ -392,15 +398,14 @@ bool set_process_priority(enum lm_ggml_sched_priority prio);
392398
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
393399
std::string string_format(const char * fmt, ...);
394400

395-
std::vector<std::string> string_split(std::string input, char separator);
396-
397401
std::string string_strip(const std::string & str);
398402
std::string string_get_sortable_timestamp();
399403

400404
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
401405

402406
template<class T>
403407
static std::vector<T> string_split(const std::string & str, char delim) {
408+
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
404409
std::vector<T> values;
405410
std::istringstream str_stream(str);
406411
std::string token;
@@ -413,6 +418,22 @@ static std::vector<T> string_split(const std::string & str, char delim) {
413418
return values;
414419
}
415420

421+
template<>
422+
std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
423+
{
424+
std::vector<std::string> parts;
425+
size_t begin_pos = 0;
426+
size_t separator_pos = input.find(separator);
427+
while (separator_pos != std::string::npos) {
428+
std::string part = input.substr(begin_pos, separator_pos - begin_pos);
429+
parts.emplace_back(part);
430+
begin_pos = separator_pos + 1;
431+
separator_pos = input.find(separator, begin_pos);
432+
}
433+
parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos));
434+
return parts;
435+
}
436+
416437
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
417438
void string_process_escapes(std::string & input);
418439

0 commit comments

Comments
 (0)