@@ -249,19 +249,6 @@ public void AddMirostat2Sampler(uint seed, float tau, float eta)
249249 static extern IntPtr llama_sampler_init_mirostat_v2 ( uint seed , float tau , float eta ) ;
250250 }
251251
252-
253- /// <summary>
254- /// Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
255- /// </summary>
256- /// <returns></returns>
257- public void AddSoftmax ( )
258- {
259- llama_sampler_chain_add ( this , llama_sampler_init_softmax ( ) ) ;
260-
261- [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
262- static extern IntPtr llama_sampler_init_softmax ( ) ;
263- }
264-
265252 /// <summary>
266253 /// Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
267254 /// </summary>
@@ -291,7 +278,6 @@ public void AddTopP(float p, nint minKeep)
291278 /// <summary>
292279 /// Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
293280 /// </summary>
294- /// <returns></returns>
295281 public void AddMinP ( float p , nint minKeep )
296282 {
297283 llama_sampler_chain_add ( this , llama_sampler_init_min_p ( p , minKeep ) ) ;
@@ -305,7 +291,6 @@ public void AddMinP(float p, nint minKeep)
305291 /// <summary>
306292 /// Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
307293 /// </summary>
308- /// <returns></returns>
309294 public void AddTailFree ( float z , nint minKeep )
310295 {
311296 llama_sampler_chain_add ( this , llama_sampler_init_tail_free ( z , minKeep ) ) ;
@@ -319,7 +304,6 @@ public void AddTailFree(float z, nint minKeep)
319304 /// <summary>
320305 /// Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
321306 /// </summary>
322- /// <returns></returns>
323307 public void AddTypical ( float p , nint minKeep )
324308 {
325309 llama_sampler_chain_add ( this , llama_sampler_init_typical ( p , minKeep ) ) ;
@@ -331,14 +315,15 @@ public void AddTypical(float p, nint minKeep)
331315 }
332316
333317 /// <summary>
334- /// Apply temperature to the logits
318+ /// Apply temperature to the logits.
319+ /// If temperature is less than zero the maximum logit is left unchanged and the rest are set to -infinity
335320 /// </summary>
336321 /// <param name="t"></param>
337- /// <returns></returns>
338322 public void AddTemperature ( float t )
339323 {
340324 llama_sampler_chain_add ( this , llama_sampler_init_temp ( t ) ) ;
341325
326+ // #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
342327 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
343328 static extern IntPtr llama_sampler_init_temp ( float t ) ;
344329 }
@@ -349,7 +334,6 @@ public void AddTemperature(float t)
349334 /// <param name="t"></param>
350335 /// <param name="delta"></param>
351336 /// <param name="exponent"></param>
352- /// <returns></returns>
353337 public void AddDynamicTemperature ( float t , float delta , float exponent )
354338 {
355339 llama_sampler_chain_add ( this , llama_sampler_init_temp_ext ( t , delta , exponent ) ) ;
@@ -358,6 +342,51 @@ public void AddDynamicTemperature(float t, float delta, float exponent)
358342 static extern IntPtr llama_sampler_init_temp_ext ( float t , float delta , float exponent ) ;
359343 }
360344
345+ /// <summary>
346+ /// XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
347+ /// </summary>
348+ /// <param name="p"></param>
349+ /// <param name="t"></param>
350+ /// <param name="minKeep"></param>
351+ /// <param name="seed"></param>
352+ public void AddXTC ( float p , float t , int minKeep , uint seed )
353+ {
354+ llama_sampler_chain_add ( this , llama_sampler_init_xtc ( p , t , minKeep , seed ) ) ;
355+
356+ [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
357+ static extern IntPtr llama_sampler_init_xtc ( float p , float t , nint minKeep , uint seed ) ;
358+ }
359+
360+ /// <summary>
361+ /// This sampler is meant to be used for fill-in-the-middle infilling, after top_k + top_p sampling
362+ ///<br />
363+ /// 1. if the sum of the EOG probs times the number of candidates is higher than the sum of the other probs -> pick EOG<br />
364+ /// 2. combine probs of tokens that have the same prefix<br />
365+ /// <br />
366+ /// example:<br />
367+ /// <br />
368+ /// - before:<br />
369+ /// "hel": 0.5<br />
370+ /// "hell": 0.2<br />
371+ /// "hello": 0.1<br />
372+ /// "dummy": 0.1<br />
373+ ///<br />
374+ /// - after:<br />
375+ /// "hel": 0.8<br />
376+ /// "dummy": 0.1<br />
377+ ///<br />
378+ /// 3. discard non-EOG tokens with low prob<br />
379+ /// 4. if no tokens are left -> pick EOT
380+ /// </summary>
381+ /// <param name="model"></param>
382+ public void AddFillInMiddleInfill ( SafeLlamaModelHandle model )
383+ {
384+ llama_sampler_chain_add ( this , llama_sampler_init_infill ( model ) ) ;
385+
386+ [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
387+ static extern IntPtr llama_sampler_init_infill ( SafeLlamaModelHandle model ) ;
388+ }
389+
361390 /// <summary>
362391 /// Create a sampler which makes tokens impossible unless they match the grammar
363392 /// </summary>
0 commit comments