@@ -84,14 +84,15 @@ enum llama_example {
8484
8585enum common_sampler_type {
8686 COMMON_SAMPLER_TYPE_NONE = 0 ,
87- COMMON_SAMPLER_TYPE_TOP_K = 1 ,
88- COMMON_SAMPLER_TYPE_TOP_P = 2 ,
89- COMMON_SAMPLER_TYPE_MIN_P = 3 ,
90- COMMON_SAMPLER_TYPE_TFS_Z = 4 ,
91- COMMON_SAMPLER_TYPE_TYPICAL_P = 5 ,
92- COMMON_SAMPLER_TYPE_TEMPERATURE = 6 ,
93- COMMON_SAMPLER_TYPE_XTC = 7 ,
94- COMMON_SAMPLER_TYPE_INFILL = 8 ,
87+ COMMON_SAMPLER_TYPE_DRY = 1 ,
88+ COMMON_SAMPLER_TYPE_TOP_K = 2 ,
89+ COMMON_SAMPLER_TYPE_TOP_P = 3 ,
90+ COMMON_SAMPLER_TYPE_MIN_P = 4 ,
91+ COMMON_SAMPLER_TYPE_TFS_Z = 5 ,
92+ COMMON_SAMPLER_TYPE_TYPICAL_P = 6 ,
93+ COMMON_SAMPLER_TYPE_TEMPERATURE = 7 ,
94+ COMMON_SAMPLER_TYPE_XTC = 8 ,
95+ COMMON_SAMPLER_TYPE_INFILL = 9 ,
9596};
9697
9798// dimensionality reduction methods, used by cvector-generator
@@ -104,32 +105,39 @@ enum dimre_method {
104105struct common_sampler_params {
105106 uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
106107
107- int32_t n_prev = 64 ; // number of previous tokens to remember
108- int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
109- int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
110- int32_t top_k = 40 ; // <= 0 to use vocab size
111- float top_p = 0 .95f ; // 1.0 = disabled
112- float min_p = 0 .05f ; // 0.0 = disabled
113- float xtc_probability = 0 .00f ; // 0.0 = disabled
114- float xtc_threshold = 0 .10f ; // > 0.5 disables XTC
115- float tfs_z = 1 .00f ; // 1.0 = disabled
116- float typ_p = 1 .00f ; // typical_p, 1.0 = disabled
117- float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
118- float dynatemp_range = 0 .00f ; // 0.0 = disabled
119- float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
120- int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
121- float penalty_repeat = 1 .00f ; // 1.0 = disabled
122- float penalty_freq = 0 .00f ; // 0.0 = disabled
123- float penalty_present = 0 .00f ; // 0.0 = disabled
124- int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
125- float mirostat_tau = 5 .00f ; // target entropy
126- float mirostat_eta = 0 .10f ; // learning rate
127- bool penalize_nl = false ; // consider newlines as a repeatable token
128- bool ignore_eos = false ;
129- bool no_perf = false ; // disable performance metrics
108+ int32_t n_prev = 64 ; // number of previous tokens to remember
109+ int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
110+ int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
111+ int32_t top_k = 40 ; // <= 0 to use vocab size
112+ float top_p = 0 .95f ; // 1.0 = disabled
113+ float min_p = 0 .05f ; // 0.0 = disabled
114+ float xtc_probability = 0 .00f ; // 0.0 = disabled
115+ float xtc_threshold = 0 .10f ; // > 0.5 disables XTC
116+ float tfs_z = 1 .00f ; // 1.0 = disabled
117+ float typ_p = 1 .00f ; // typical_p, 1.0 = disabled
118+ float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
119+ float dynatemp_range = 0 .00f ; // 0.0 = disabled
120+ float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
121+ int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
122+ float penalty_repeat = 1 .00f ; // 1.0 = disabled
123+ float penalty_freq = 0 .00f ; // 0.0 = disabled
124+ float penalty_present = 0 .00f ; // 0.0 = disabled
125+ float dry_multiplier = 0 .0f ; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
126+ float dry_base = 1 .75f ; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
127+ int32_t dry_allowed_length = 2 ; // tokens extending repetitions beyond this receive penalty
128+ int32_t dry_penalty_last_n = -1 ; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
129+ int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
130+ float mirostat_tau = 5 .00f ; // target entropy
131+ float mirostat_eta = 0 .10f ; // learning rate
132+ bool penalize_nl = false ; // consider newlines as a repeatable token
133+ bool ignore_eos = false ;
134+ bool no_perf = false ; // disable performance metrics
135+
136+ std::vector<std::string> dry_sequence_breakers = {" \n " , " :" , " \" " , " *" }; // default sequence breakers for DRY
130137
131138
132139 std::vector<enum common_sampler_type> samplers = {
140+ COMMON_SAMPLER_TYPE_DRY,
133141 COMMON_SAMPLER_TYPE_TOP_K,
134142 COMMON_SAMPLER_TYPE_TFS_Z,
135143 COMMON_SAMPLER_TYPE_TYPICAL_P,
@@ -380,15 +388,14 @@ bool set_process_priority(enum ggml_sched_priority prio);
380388LLAMA_COMMON_ATTRIBUTE_FORMAT (1 , 2 )
381389std::string string_format(const char * fmt, ...);
382390
383- std::vector<std::string> string_split (std::string input, char separator);
384-
385391std::string string_strip (const std::string & str);
386392std::string string_get_sortable_timestamp ();
387393
388394void string_replace_all (std::string & s, const std::string & search, const std::string & replace);
389395
390396template <class T >
391397static std::vector<T> string_split (const std::string & str, char delim) {
398+ static_assert (!std::is_same<T, std::string>::value, " Please use the specialized version for std::string" );
392399 std::vector<T> values;
393400 std::istringstream str_stream (str);
394401 std::string token;
@@ -401,6 +408,22 @@ static std::vector<T> string_split(const std::string & str, char delim) {
401408 return values;
402409}
403410
411+ template <>
412+ std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
413+ {
414+ std::vector<std::string> parts;
415+ size_t begin_pos = 0 ;
416+ size_t separator_pos = input.find (separator);
417+ while (separator_pos != std::string::npos) {
418+ std::string part = input.substr (begin_pos, separator_pos - begin_pos);
419+ parts.emplace_back (part);
420+ begin_pos = separator_pos + 1 ;
421+ separator_pos = input.find (separator, begin_pos);
422+ }
423+ parts.emplace_back (input.substr (begin_pos, separator_pos - begin_pos));
424+ return parts;
425+ }
426+
404427bool string_parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides);
405428void string_process_escapes (std::string & input);
406429
0 commit comments