@@ -171,56 +171,43 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
171171                params.penalize_nl ,
172172                params.ignore_eos ));
173173
174-     if  (params.temp  >= 0 .0f ) {
175-         if  (params.mirostat  == 0 ) {
176-             for  (const  auto  & cnstr : params.samplers ) {
177-                 switch  (cnstr) {
178-                     case  COMMON_SAMPLER_TYPE_TOP_K:
179-                         llama_sampler_chain_add (result->chain , llama_sampler_init_top_k     (params.top_k ));
180-                         break ;
181-                     case  COMMON_SAMPLER_TYPE_TOP_P:
182-                         llama_sampler_chain_add (result->chain , llama_sampler_init_top_p     (params.top_p , params.min_keep ));
183-                         break ;
184-                     case  COMMON_SAMPLER_TYPE_MIN_P:
185-                         llama_sampler_chain_add (result->chain , llama_sampler_init_min_p     (params.min_p , params.min_keep ));
186-                         break ;
187-                     case  COMMON_SAMPLER_TYPE_XTC:
188-                         llama_sampler_chain_add (result->chain , llama_sampler_init_xtc       (params.xtc_probability , params.xtc_threshold , params.min_keep , params.seed ));
189-                         break ;
190-                     case  COMMON_SAMPLER_TYPE_TFS_Z:
191-                         llama_sampler_chain_add (result->chain , llama_sampler_init_tail_free (params.tfs_z , params.min_keep ));
192-                         break ;
193-                     case  COMMON_SAMPLER_TYPE_TYPICAL_P:
194-                         llama_sampler_chain_add (result->chain , llama_sampler_init_typical   (params.typ_p , params.min_keep ));
195-                         break ;
196-                     case  COMMON_SAMPLER_TYPE_TEMPERATURE:
197-                         llama_sampler_chain_add (result->chain , llama_sampler_init_temp_ext  (params.temp , params.dynatemp_range , params.dynatemp_exponent ));
198-                         break ;
199-                     default :
200-                         GGML_ASSERT (false  && " unknown sampler type"  );
201-                 }
174+     if  (params.mirostat  == 0 ) {
175+         for  (const  auto  & cnstr : params.samplers ) {
176+             switch  (cnstr) {
177+                 case  COMMON_SAMPLER_TYPE_TOP_K:
178+                     llama_sampler_chain_add (result->chain , llama_sampler_init_top_k     (params.top_k ));
179+                     break ;
180+                 case  COMMON_SAMPLER_TYPE_TOP_P:
181+                     llama_sampler_chain_add (result->chain , llama_sampler_init_top_p     (params.top_p , params.min_keep ));
182+                     break ;
183+                 case  COMMON_SAMPLER_TYPE_MIN_P:
184+                     llama_sampler_chain_add (result->chain , llama_sampler_init_min_p     (params.min_p , params.min_keep ));
185+                     break ;
186+                 case  COMMON_SAMPLER_TYPE_XTC:
187+                     llama_sampler_chain_add (result->chain , llama_sampler_init_xtc       (params.xtc_probability , params.xtc_threshold , params.min_keep , params.seed ));
188+                     break ;
189+                 case  COMMON_SAMPLER_TYPE_TFS_Z:
190+                     llama_sampler_chain_add (result->chain , llama_sampler_init_tail_free (params.tfs_z , params.min_keep ));
191+                     break ;
192+                 case  COMMON_SAMPLER_TYPE_TYPICAL_P:
193+                     llama_sampler_chain_add (result->chain , llama_sampler_init_typical   (params.typ_p , params.min_keep ));
194+                     break ;
195+                 case  COMMON_SAMPLER_TYPE_TEMPERATURE:
196+                     llama_sampler_chain_add (result->chain , llama_sampler_init_temp_ext  (params.temp , params.dynatemp_range , params.dynatemp_exponent ));
197+                     break ;
198+                 default :
199+                     GGML_ASSERT (false  && " unknown sampler type"  );
202200            }
203-             llama_sampler_chain_add (result->chain , llama_sampler_init_dist (params.seed ));
204-         } else  if  (params.mirostat  == 1 ) {
205-             llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
206-             llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat (llama_n_vocab (model), params.seed , params.mirostat_tau , params.mirostat_eta , 100 ));
207-         } else  if  (params.mirostat  == 2 ) {
208-             llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
209-             llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat_v2 (params.seed , params.mirostat_tau , params.mirostat_eta ));
210-         } else  {
211-             GGML_ASSERT (false  && " unknown mirostat version"  );
212201        }
202+         llama_sampler_chain_add (result->chain , llama_sampler_init_dist (params.seed ));
203+     } else  if  (params.mirostat  == 1 ) {
204+         llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
205+         llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat (llama_n_vocab (model), params.seed , params.mirostat_tau , params.mirostat_eta , 100 ));
206+     } else  if  (params.mirostat  == 2 ) {
207+         llama_sampler_chain_add (result->chain , llama_sampler_init_temp (params.temp ));
208+         llama_sampler_chain_add (result->chain , llama_sampler_init_mirostat_v2 (params.seed , params.mirostat_tau , params.mirostat_eta ));
213209    } else  {
214-         //  negative temperatures will trigger "greedy" sampling: simply take the most likely token each time
215-         if  (params.n_probs  > 0 ) {
216-             //  some use cases require to sample greedily, but still obtain the probabilities of the top tokens
217-             //  ref: https://github.com/ggerganov/llama.cpp/pull/9605
218-             // 
219-             //  the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
220-             //  it is much faster, since we avoid sorting all tokens and should give a good approximation
221-             llama_sampler_chain_add (result->chain , llama_sampler_init_top_k (params.n_probs ));
222-         }
223-         llama_sampler_chain_add (result->chain , llama_sampler_init_greedy ());
210+         GGML_ASSERT (false  && " unknown mirostat version"  );
224211    }
225212
226213    return  result;
0 commit comments