@@ -80,14 +80,15 @@ enum llama_example {
8080
8181enum common_sampler_type {
8282 COMMON_SAMPLER_TYPE_NONE = 0 ,
83- COMMON_SAMPLER_TYPE_TOP_K = 1 ,
84- COMMON_SAMPLER_TYPE_TOP_P = 2 ,
85- COMMON_SAMPLER_TYPE_MIN_P = 3 ,
86- COMMON_SAMPLER_TYPE_TFS_Z = 4 ,
87- COMMON_SAMPLER_TYPE_TYPICAL_P = 5 ,
88- COMMON_SAMPLER_TYPE_TEMPERATURE = 6 ,
89- COMMON_SAMPLER_TYPE_XTC = 7 ,
90- COMMON_SAMPLER_TYPE_INFILL = 8 ,
83+ COMMON_SAMPLER_TYPE_DRY = 1 ,
84+ COMMON_SAMPLER_TYPE_TOP_K = 2 ,
85+ COMMON_SAMPLER_TYPE_TOP_P = 3 ,
86+ COMMON_SAMPLER_TYPE_MIN_P = 4 ,
87+ // COMMON_SAMPLER_TYPE_TFS_Z = 5,
88+ COMMON_SAMPLER_TYPE_TYPICAL_P = 6 ,
89+ COMMON_SAMPLER_TYPE_TEMPERATURE = 7 ,
90+ COMMON_SAMPLER_TYPE_XTC = 8 ,
91+ COMMON_SAMPLER_TYPE_INFILL = 9 ,
9192};
9293
9394// dimensionality reduction methods, used by cvector-generator
@@ -100,34 +101,39 @@ enum dimre_method {
100101struct common_sampler_params {
101102 uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
102103
103- int32_t n_prev = 64 ; // number of previous tokens to remember
104- int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
105- int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
106- int32_t top_k = 40 ; // <= 0 to use vocab size
107- float top_p = 0 .95f ; // 1.0 = disabled
108- float min_p = 0 .05f ; // 0.0 = disabled
109- float xtc_probability = 0 .00f ; // 0.0 = disabled
110- float xtc_threshold = 0 .10f ; // > 0.5 disables XTC
111- float tfs_z = 1 .00f ; // 1.0 = disabled
112- float typ_p = 1 .00f ; // typical_p, 1.0 = disabled
113- float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
114- float dynatemp_range = 0 .00f ; // 0.0 = disabled
115- float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
116- int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
117- float penalty_repeat = 1 .00f ; // 1.0 = disabled
118- float penalty_freq = 0 .00f ; // 0.0 = disabled
119- float penalty_present = 0 .00f ; // 0.0 = disabled
120- int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
121- float mirostat_tau = 5 .00f ; // target entropy
122- float mirostat_eta = 0 .10f ; // learning rate
123- bool penalize_nl = false ; // consider newlines as a repeatable token
124- bool ignore_eos = false ;
125- bool no_perf = false ; // disable performance metrics
104+ int32_t n_prev = 64 ; // number of previous tokens to remember
105+ int32_t n_probs = 0 ; // if greater than 0, output the probabilities of top n_probs tokens.
106+ int32_t min_keep = 0 ; // 0 = disabled, otherwise samplers should return at least min_keep tokens
107+ int32_t top_k = 40 ; // <= 0 to use vocab size
108+ float top_p = 0 .95f ; // 1.0 = disabled
109+ float min_p = 0 .05f ; // 0.0 = disabled
110+ float xtc_probability = 0 .00f ; // 0.0 = disabled
111+ float xtc_threshold = 0 .10f ; // > 0.5 disables XTC
112+ float typ_p = 1 .00f ; // typical_p, 1.0 = disabled
113+ float temp = 0 .80f ; // <= 0.0 to sample greedily, 0.0 to not output probabilities
114+ float dynatemp_range = 0 .00f ; // 0.0 = disabled
115+ float dynatemp_exponent = 1 .00f ; // controls how entropy maps to temperature in dynamic temperature sampler
116+ int32_t penalty_last_n = 64 ; // last n tokens to penalize (0 = disable penalty, -1 = context size)
117+ float penalty_repeat = 1 .00f ; // 1.0 = disabled
118+ float penalty_freq = 0 .00f ; // 0.0 = disabled
119+ float penalty_present = 0 .00f ; // 0.0 = disabled
120+ float dry_multiplier = 0 .0f ; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
121+ float dry_base = 1 .75f ; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
122+ int32_t dry_allowed_length = 2 ; // tokens extending repetitions beyond this receive penalty
123+ int32_t dry_penalty_last_n = -1 ; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
124+ int32_t mirostat = 0 ; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
125+ float mirostat_tau = 5 .00f ; // target entropy
126+ float mirostat_eta = 0 .10f ; // learning rate
127+ bool penalize_nl = false ; // consider newlines as a repeatable token
128+ bool ignore_eos = false ;
129+ bool no_perf = false ; // disable performance metrics
130+
131+ std::vector<std::string> dry_sequence_breakers = {" \n " , " :" , " \" " , " *" }; // default sequence breakers for DRY
126132
127133
128134 std::vector<enum common_sampler_type> samplers = {
135+ COMMON_SAMPLER_TYPE_DRY,
129136 COMMON_SAMPLER_TYPE_TOP_K,
130- COMMON_SAMPLER_TYPE_TFS_Z,
131137 COMMON_SAMPLER_TYPE_TYPICAL_P,
132138 COMMON_SAMPLER_TYPE_TOP_P,
133139 COMMON_SAMPLER_TYPE_MIN_P,
@@ -376,15 +382,14 @@ bool set_process_priority(enum ggml_sched_priority prio);
376382LLAMA_COMMON_ATTRIBUTE_FORMAT (1 , 2 )
377383std::string string_format(const char * fmt, ...);
378384
379- std::vector<std::string> string_split (std::string input, char separator);
380-
381385std::string string_strip (const std::string & str);
382386std::string string_get_sortable_timestamp ();
383387
384388void string_replace_all (std::string & s, const std::string & search, const std::string & replace);
385389
386390template <class T >
387391static std::vector<T> string_split (const std::string & str, char delim) {
392+ static_assert (!std::is_same<T, std::string>::value, " Please use the specialized version for std::string" );
388393 std::vector<T> values;
389394 std::istringstream str_stream (str);
390395 std::string token;
@@ -397,6 +402,22 @@ static std::vector<T> string_split(const std::string & str, char delim) {
397402 return values;
398403}
399404
405+ template <>
406+ std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
407+ {
408+ std::vector<std::string> parts;
409+ size_t begin_pos = 0 ;
410+ size_t separator_pos = input.find (separator);
411+ while (separator_pos != std::string::npos) {
412+ std::string part = input.substr (begin_pos, separator_pos - begin_pos);
413+ parts.emplace_back (part);
414+ begin_pos = separator_pos + 1 ;
415+ separator_pos = input.find (separator, begin_pos);
416+ }
417+ parts.emplace_back (input.substr (begin_pos, separator_pos - begin_pos));
418+ return parts;
419+ }
420+
400421bool string_parse_kv_override (const char * data, std::vector<llama_model_kv_override> & overrides);
401422void string_process_escapes (std::string & input);
402423
0 commit comments