@@ -95,14 +95,15 @@ enum llama_example {
9595
9696enum common_sampler_type {
9797 COMMON_SAMPLER_TYPE_NONE = 0 ,
98- COMMON_SAMPLER_TYPE_TOP_K = 1 ,
99- COMMON_SAMPLER_TYPE_TOP_P = 2 ,
100- COMMON_SAMPLER_TYPE_MIN_P = 3 ,
101- COMMON_SAMPLER_TYPE_TFS_Z = 4 ,
102- COMMON_SAMPLER_TYPE_TYPICAL_P = 5 ,
103- COMMON_SAMPLER_TYPE_TEMPERATURE = 6 ,
104- COMMON_SAMPLER_TYPE_XTC = 7 ,
105- COMMON_SAMPLER_TYPE_INFILL = 8 ,
98+ COMMON_SAMPLER_TYPE_DRY = 1 ,
99+ COMMON_SAMPLER_TYPE_TOP_K = 2 ,
100+ COMMON_SAMPLER_TYPE_TOP_P = 3 ,
101+ COMMON_SAMPLER_TYPE_MIN_P = 4 ,
102+ // COMMON_SAMPLER_TYPE_TFS_Z = 5,
103+ COMMON_SAMPLER_TYPE_TYPICAL_P = 6 ,
104+ COMMON_SAMPLER_TYPE_TEMPERATURE = 7 ,
105+ COMMON_SAMPLER_TYPE_XTC = 8 ,
106+ COMMON_SAMPLER_TYPE_INFILL = 9 ,
106107};
107108
108109// dimensionality reduction methods, used by cvector-generator
@@ -115,34 +116,39 @@ enum dimre_method {
115116struct common_sampler_params {
116117 uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
117118
118- int32_t n_prev = 64 ; // number of previous tokens to remember
119- int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
120- int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
121- int32_t top_k = 40 ; // <= 0 to use vocab size
122- float top_p = 0 .95f ; // 1.0 = disabled
123- float min_p = 0 .05f ; // 0.0 = disabled
124- float xtc_probability = 0 .00f ; // 0.0 = disabled
125- float xtc_threshold = 0 .10f ; // > 0.5 disables XTC
126- float tfs_z = 1 .00f ; // 1.0 = disabled
127- float typ_p = 1 .00f ; // typical_p, 1.0 = disabled
128- float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
129- float dynatemp_range = 0 .00f ; // 0.0 = disabled
130- float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
131- int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
132- float penalty_repeat = 1 .00f ; // 1.0 = disabled
133- float penalty_freq = 0 .00f ; // 0.0 = disabled
134- float penalty_present = 0 .00f ; // 0.0 = disabled
135- int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
136- float mirostat_tau = 5 .00f ; // target entropy
137- float mirostat_eta = 0 .10f ; // learning rate
138- bool penalize_nl = false ; // consider newlines as a repeatable token
139- bool ignore_eos = false ;
140- bool no_perf = false ; // disable performance metrics
119+ int32_t n_prev = 64 ; // number of previous tokens to remember
120+ int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
121+ int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
122+ int32_t top_k = 40 ; // <= 0 to use vocab size
123+ float top_p = 0 .95f ; // 1.0 = disabled
124+ float min_p = 0 .05f ; // 0.0 = disabled
125+ float xtc_probability = 0 .00f ; // 0.0 = disabled
126+ float xtc_threshold = 0 .10f ; // > 0.5 disables XTC
127+ float typ_p = 1 .00f ; // typical_p, 1.0 = disabled
128+ float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
129+ float dynatemp_range = 0 .00f ; // 0.0 = disabled
130+ float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
131+ int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
132+ float penalty_repeat = 1 .00f ; // 1.0 = disabled
133+ float penalty_freq = 0 .00f ; // 0.0 = disabled
134+ float penalty_present = 0 .00f ; // 0.0 = disabled
135+ float dry_multiplier = 0 .0f ; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
136+ float dry_base = 1 .75f ; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
137+ int32_t dry_allowed_length = 2 ; // tokens extending repetitions beyond this receive penalty
138+ int32_t dry_penalty_last_n = -1 ; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
139+ int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
140+ float mirostat_tau = 5 .00f ; // target entropy
141+ float mirostat_eta = 0 .10f ; // learning rate
142+ bool penalize_nl = false ; // consider newlines as a repeatable token
143+ bool ignore_eos = false ;
144+ bool no_perf = false ; // disable performance metrics
145+
146+ std::vector<std::string> dry_sequence_breakers = {" \n " , " :" , " \" " , " *" }; // default sequence breakers for DRY
141147
142148
143149 std::vector<enum common_sampler_type> samplers = {
150+ COMMON_SAMPLER_TYPE_DRY,
144151 COMMON_SAMPLER_TYPE_TOP_K,
145- COMMON_SAMPLER_TYPE_TFS_Z,
146152 COMMON_SAMPLER_TYPE_TYPICAL_P,
147153 COMMON_SAMPLER_TYPE_TOP_P,
148154 COMMON_SAMPLER_TYPE_MIN_P,
@@ -392,15 +398,14 @@ bool set_process_priority(enum lm_ggml_sched_priority prio);
392398LLAMA_COMMON_ATTRIBUTE_FORMAT (1 , 2 )
393399std::string string_format(const char * fmt, ...);
394400
395- std::vector<std::string> string_split (std::string input, char separator);
396-
397401std::string string_strip (const std::string & str);
398402std::string string_get_sortable_timestamp ();
399403
400404void string_replace_all (std::string & s, const std::string & search, const std::string & replace);
401405
402406template <class T >
403407static std::vector<T> string_split (const std::string & str, char delim) {
408+ static_assert (!std::is_same<T, std::string>::value, " Please use the specialized version for std::string" );
404409 std::vector<T> values;
405410 std::istringstream str_stream (str);
406411 std::string token;
@@ -413,6 +418,22 @@ static std::vector<T> string_split(const std::string & str, char delim) {
413418 return values;
414419}
415420
421+ template <>
422+ std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
423+ {
424+ std::vector<std::string> parts;
425+ size_t begin_pos = 0 ;
426+ size_t separator_pos = input.find (separator);
427+ while (separator_pos != std::string::npos) {
428+ std::string part = input.substr (begin_pos, separator_pos - begin_pos);
429+ parts.emplace_back (part);
430+ begin_pos = separator_pos + 1 ;
431+ separator_pos = input.find (separator, begin_pos);
432+ }
433+ parts.emplace_back (input.substr (begin_pos, separator_pos - begin_pos));
434+ return parts;
435+ }
436+
416437bool string_parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides);
417438void string_process_escapes (std::string & input);
418439
0 commit comments