Merge pull request #962 from martindevans/nov_binaries

martindevans · web-flow · commit b3218395bb8f · 2024-11-07T02:37:10.000Z
November Binary Update
diff --git a/.github/_typos.toml b/.github/_typos.toml
@@ -14,3 +14,7 @@ extend-exclude = [
     "LLama.Benchmark/Assets/",
     "LLama.Examples/Assets/"
 ]
+
+[default.extend-words]
+# Used in a comment in SafeLLamaSamplerHandle.cs, as a prefix of "hello"
+teh = "hel"
diff --git a/LLama.Examples/Examples/CustomSampler.cs b/LLama.Examples/Examples/CustomSampler.cs
@@ -60,7 +60,6 @@ protected override SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandl
             chain.AddCustom(new RemoveMostLikelyToken());
 
             // Select from the distribution
-            chain.AddSoftmax();
             chain.AddDistributionSampler(42);
 
             return chain;
diff --git a/LLama/Extensions/LLamaExecutorExtensions.cs b/LLama/Extensions/LLamaExecutorExtensions.cs
@@ -150,7 +150,6 @@ private string CreatePrompt(IList<ChatMessage> messages)
                     MinKeep = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinKeep), out int mk) is true ? mk : s_defaultPipeline.MinKeep,
                     MinP = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinP), out float mp) is true ? mp : s_defaultPipeline.MinP,
                     Seed = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.Seed), out uint seed) is true ? seed : (uint)(t_random ??= new()).Next(),
-                    TailFreeZ = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.TailFreeZ), out float tfz) is true ? tfz : s_defaultPipeline.TailFreeZ,
                     Temperature = options?.Temperature ?? 0,
                     TopP = options?.TopP ?? 0,
                     TopK = options?.TopK ?? s_defaultPipeline.TopK,
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>c35e586ea5722184</BinaryReleaseId>
+    <BinaryReleaseId>958367bf530d943a90</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/LLama/LLavaWeights.cs b/LLama/LLavaWeights.cs
@@ -9,7 +9,8 @@ namespace LLama;
 /// <summary>
 /// A set of llava model weights (mmproj), loaded into memory.
 /// </summary>
-public sealed class LLavaWeights : IDisposable
+public sealed class LLavaWeights
+    : IDisposable
 {
     /// <summary>
     /// The native handle, which is used in the native APIs
diff --git a/LLama/Native/LLamaNativeBatch.cs b/LLama/Native/LLamaNativeBatch.cs
@@ -25,6 +25,7 @@ public unsafe struct LLamaNativeBatch
 
     /// <summary>
     /// the positions of the respective token in the sequence
+    /// (if set to NULL, the token position will be tracked automatically by llama_decode)
     /// </summary>
     public LLamaPos* pos;
 
@@ -35,18 +36,13 @@ public unsafe struct LLamaNativeBatch
 
     /// <summary>
     /// the sequence to which the respective token belongs
+    /// (if set to NULL, the sequence ID will be assumed to be 0)
     /// </summary>
     public LLamaSeqId** seq_id;
 
     /// <summary>
     /// if zero, the logits for the respective token will not be output
+    /// (if set to NULL, only the logits for last token will be returned)
     /// </summary>
     public byte* logits;
-
-    // Note from llama.cpp:
-    // > helpers for smooth API transition - can be deprecated in the future
-    // > for future-proof code, use the above fields instead and ignore everything below
-    private LLamaPos _all_pos_0;
-    private LLamaPos _all_pos_1;
-    private LLamaSeqId _all_seq_id;
 }
diff --git a/LLama/Native/LLamaPoolingType.cs b/LLama/Native/LLamaPoolingType.cs
@@ -29,4 +29,9 @@ public enum LLamaPoolingType
     CLS = 2,
 
     Last = 3,
+
+    /// <summary>
+    /// Used by reranking models to attach the classification head to the graph
+    /// </summary>
+    Rank,
 }
diff --git a/LLama/Native/LLamaVocabPreType.cs b/LLama/Native/LLamaVocabPreType.cs
@@ -33,4 +33,5 @@ internal enum LLamaVocabPreType
     BLOOM = 23,
     GPT3_FINNISH = 24,
     EXAONE = 25,
+    CHAMELEON = 26,
 }
diff --git a/LLama/Native/NativeApi.Sampling.cs b/LLama/Native/NativeApi.Sampling.cs
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
@@ -49,6 +49,14 @@ public static void llama_empty_call()
         [return: MarshalAs(UnmanagedType.U1)]
         public static extern bool llama_supports_gpu_offload();
 
+        /// <summary>
+        /// Check if RPC offload is supported
+        /// </summary>
+        /// <returns></returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        [return: MarshalAs(UnmanagedType.U1)]
+        public static extern bool llama_supports_rpc();
+
         /// <summary>
         /// Initialize the llama + ggml backend. Call once at the start of the program.
         ///
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
@@ -368,8 +368,10 @@ static SafeLLamaContextHandle()
         private static extern LLamaPoolingType llama_pooling_type(SafeLLamaContextHandle ctx);
 
         /// <summary>
-        /// Get the embeddings for the a specific sequence.
-        /// Equivalent to: llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
+        /// Get the embeddings for a sequence id.
+        /// Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
+        /// when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
+        /// otherwise: float[n_embd] (1-dimensional)
         /// </summary>
         /// <returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
diff --git a/LLama/Native/SafeLLamaSamplerHandle.cs b/LLama/Native/SafeLLamaSamplerHandle.cs
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
diff --git a/LLama/Sampling/DefaultSamplingPipeline.cs b/LLama/Sampling/DefaultSamplingPipeline.cs

Original file line number	Diff line number	Diff line change
`@@ -14,3 +14,7 @@ extend-exclude = [`
`14`	`14`	`"LLama.Benchmark/Assets/",`
`15`	`15`	`"LLama.Examples/Assets/"`
`16`	`16`	`]`
	`17`	`+`
	`18`	`+[default.extend-words]`
	`19`	`+# Used in a comment in SafeLLamaSamplerHandle.cs, as a prefix of "hello"`
	`20`	`+teh = "hel"`
Original file line number	Diff line number	Diff line change
`@@ -29,4 +29,9 @@ public enum LLamaPoolingType`
`29`	`29`	`CLS = 2,`
`30`	`30`
`31`	`31`	`Last = 3,`
	`32`	`+`
	`33`	`+ /// <summary>`
	`34`	`+ /// Used by reranking models to attach the classification head to the graph`
	`35`	`+ /// </summary>`
	`36`	`+ Rank,`
`32`	`37`	`}`
Original file line number	Diff line number	Diff line change
`@@ -33,4 +33,5 @@ internal enum LLamaVocabPreType`
`33`	`33`	`BLOOM = 23,`
`34`	`34`	`GPT3_FINNISH = 24,`
`35`	`35`	`EXAONE = 25,`
	`36`	`+ CHAMELEON = 26,`
`36`	`37`	`}`