@@ -267,19 +267,6 @@ public void AddMirostat2Sampler(uint seed, float tau, float eta)
267267 static extern IntPtr llama_sampler_init_mirostat_v2 ( uint seed , float tau , float eta ) ;
268268 }
269269
270-
271- /// <summary>
272- /// Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
273- /// </summary>
274- /// <returns></returns>
275- public void AddSoftmax ( )
276- {
277- llama_sampler_chain_add ( this , llama_sampler_init_softmax ( ) ) ;
278-
279- [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
280- static extern IntPtr llama_sampler_init_softmax ( ) ;
281- }
282-
283270 /// <summary>
284271 /// Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
285272 /// </summary>
@@ -309,7 +296,6 @@ public void AddTopP(float p, nint minKeep)
309296 /// <summary>
310297 /// Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
311298 /// </summary>
312- /// <returns></returns>
313299 public void AddMinP ( float p , nint minKeep )
314300 {
315301 llama_sampler_chain_add ( this , llama_sampler_init_min_p ( p , minKeep ) ) ;
@@ -323,7 +309,6 @@ public void AddMinP(float p, nint minKeep)
323309 /// <summary>
324310 /// Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
325311 /// </summary>
326- /// <returns></returns>
327312 public void AddTailFree ( float z , nint minKeep )
328313 {
329314 llama_sampler_chain_add ( this , llama_sampler_init_tail_free ( z , minKeep ) ) ;
@@ -337,7 +322,6 @@ public void AddTailFree(float z, nint minKeep)
337322 /// <summary>
338323 /// Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
339324 /// </summary>
340- /// <returns></returns>
341325 public void AddTypical ( float p , nint minKeep )
342326 {
343327 llama_sampler_chain_add ( this , llama_sampler_init_typical ( p , minKeep ) ) ;
@@ -349,14 +333,15 @@ public void AddTypical(float p, nint minKeep)
349333 }
350334
351335 /// <summary>
352- /// Apply temperature to the logits
336+ /// Apply temperature to the logits.
337+ /// If temperature is less than zero the maximum logit is left unchanged and the rest are set to -infinity
353338 /// </summary>
354339 /// <param name="t"></param>
355- /// <returns></returns>
356340 public void AddTemperature ( float t )
357341 {
358342 llama_sampler_chain_add ( this , llama_sampler_init_temp ( t ) ) ;
359343
344+ // #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
360345 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
361346 static extern IntPtr llama_sampler_init_temp ( float t ) ;
362347 }
@@ -367,7 +352,6 @@ public void AddTemperature(float t)
367352 /// <param name="t"></param>
368353 /// <param name="delta"></param>
369354 /// <param name="exponent"></param>
370- /// <returns></returns>
371355 public void AddDynamicTemperature ( float t , float delta , float exponent )
372356 {
373357 llama_sampler_chain_add ( this , llama_sampler_init_temp_ext ( t , delta , exponent ) ) ;
@@ -376,6 +360,51 @@ public void AddDynamicTemperature(float t, float delta, float exponent)
376360 static extern IntPtr llama_sampler_init_temp_ext ( float t , float delta , float exponent ) ;
377361 }
378362
363+ /// <summary>
364+ /// XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
365+ /// </summary>
366+ /// <param name="p"></param>
367+ /// <param name="t"></param>
368+ /// <param name="minKeep"></param>
369+ /// <param name="seed"></param>
370+ public void AddXTC ( float p , float t , int minKeep , uint seed )
371+ {
372+ llama_sampler_chain_add ( this , llama_sampler_init_xtc ( p , t , minKeep , seed ) ) ;
373+
374+ [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
375+ static extern IntPtr llama_sampler_init_xtc ( float p , float t , nint minKeep , uint seed ) ;
376+ }
377+
378+ /// <summary>
379+ /// This sampler is meant to be used for fill-in-the-middle infilling, after top_k + top_p sampling
380+ ///<br />
381+ /// 1. if the sum of the EOG probs times the number of candidates is higher than the sum of the other probs -> pick EOG<br />
382+ /// 2. combine probs of tokens that have the same prefix<br />
383+ /// <br />
384+ /// example:<br />
385+ /// <br />
386+ /// - before:<br />
387+ /// "hel": 0.5<br />
388+ /// "hell": 0.2<br />
389+ /// "hello": 0.1<br />
390+ /// "dummy": 0.1<br />
391+ ///<br />
392+ /// - after:<br />
393+ /// "hel": 0.8<br />
394+ /// "dummy": 0.1<br />
395+ ///<br />
396+ /// 3. discard non-EOG tokens with low prob<br />
397+ /// 4. if no tokens are left -> pick EOT
398+ /// </summary>
399+ /// <param name="model"></param>
400+ public void AddFillInMiddleInfill ( SafeLlamaModelHandle model )
401+ {
402+ llama_sampler_chain_add ( this , llama_sampler_init_infill ( model ) ) ;
403+
404+ [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
405+ static extern IntPtr llama_sampler_init_infill ( SafeLlamaModelHandle model ) ;
406+ }
407+
379408 /// <summary>
380409 /// Create a sampler which makes tokens impossible unless they match the grammar
381410 /// </summary>
0 commit comments