From ac29c34e9bfc630944b28b81605375dbe5836050 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Tue, 3 Dec 2024 16:42:39 +0000
Subject: [PATCH 01/22] code changes for december update (not working yet)

---
 LLama.Examples/Program.cs                  |  6 +-
 LLama/Extensions/IModelParamsExtensions.cs |  3 +-
 LLama/LLamaSharp.csproj                    |  2 +-
 LLama/Native/GPUSplitMode.cs               |  2 +-
 LLama/Native/LLamaModelParams.cs           |  8 ++-
 LLama/Native/SafeLLamaContextHandle.cs     | 17 ++++-
 LLama/Native/SafeLLamaSamplerHandle.cs     | 73 +++++++++++++++++++++-
 LLama/Sampling/DefaultSamplingPipeline.cs  |  2 +-
 8 files changed, 101 insertions(+), 12 deletions(-)
diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs
index 63114120d..ec4e20b03 100644
--- a/LLama.Examples/Program.cs
+++ b/LLama.Examples/Program.cs
@@ -1,4 +1,4 @@
-﻿using LLama.Native;
+using LLama.Native;
 using Spectre.Console;
 using System.Runtime.InteropServices;
 
@@ -30,8 +30,8 @@ __       __                                       ____     __
 // Configure native library to use. This must be done before any other llama.cpp methods are called!
 NativeLibraryConfig
    .All
-   .WithCuda()
-   //.WithAutoDownload() // An experimental feature
+   .WithCuda(false)
+   .WithVulkan(false)
    .DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary);
 
 // Calling this method forces loading to occur now.
diff --git a/LLama/Extensions/IModelParamsExtensions.cs b/LLama/Extensions/IModelParamsExtensions.cs
index 523ec737a..d704b2e8c 100644
--- a/LLama/Extensions/IModelParamsExtensions.cs
+++ b/LLama/Extensions/IModelParamsExtensions.cs
@@ -1,4 +1,4 @@
-﻿using System.IO;
+using System.IO;
 using System;
 using System.Text;
 using LLama.Abstractions;
@@ -30,6 +30,7 @@ public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLam
 
         result = LLamaModelParams.Default();
 
+        result.devices = IntPtr.Zero;
         result.main_gpu = @params.MainGpu;
         result.split_mode = @params.SplitMode;
         result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 735fa81a5..189c3d94a 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>958367bf530d943a90</BinaryReleaseId>
+    <BinaryReleaseId>c9b00a70b080d</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/LLama/Native/GPUSplitMode.cs b/LLama/Native/GPUSplitMode.cs
index 54fa095c1..27ee7ae49 100644
--- a/LLama/Native/GPUSplitMode.cs
+++ b/LLama/Native/GPUSplitMode.cs
@@ -17,7 +17,7 @@ public enum GPUSplitMode
     Layer = 1,
 
     /// <summary>
-    /// split rows across GPUs
+    /// split layers and KV across GPUs, use tensor parallelism if supported
     /// </summary>
     Row = 2,
 }
\ No newline at end of file
diff --git a/LLama/Native/LLamaModelParams.cs b/LLama/Native/LLamaModelParams.cs
index c0437d9db..e3394892e 100644
--- a/LLama/Native/LLamaModelParams.cs
+++ b/LLama/Native/LLamaModelParams.cs
@@ -8,6 +8,12 @@ namespace LLama.Native
     [StructLayout(LayoutKind.Sequential)]
     public unsafe struct LLamaModelParams
     {
+        /// <summary>
+        /// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
+        /// </summary>
+        //ggml_backend_dev_t* devices;
+        public IntPtr devices;
+
         /// <summary>
         /// // number of layers to store in VRAM
         /// </summary>
@@ -19,7 +25,7 @@ public unsafe struct LLamaModelParams
         public GPUSplitMode split_mode;
 
         /// <summary>
-        /// the GPU that is used for scratch and small tensors
+        /// the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
         /// </summary>
         public int main_gpu;
 
diff --git a/LLama/Native/SafeLLamaContextHandle.cs b/LLama/Native/SafeLLamaContextHandle.cs
index 450f4998a..19187ded9 100644
--- a/LLama/Native/SafeLLamaContextHandle.cs
+++ b/LLama/Native/SafeLLamaContextHandle.cs
@@ -333,6 +333,14 @@ static SafeLLamaContextHandle()
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern void llama_kv_cache_update(SafeLLamaContextHandle ctx);
 
+        /// <summary>
+        /// Check if the context supports KV cache shifting
+        /// </summary>
+        /// <param name="ctx"></param>
+        /// <returns></returns>
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        private static extern bool llama_kv_cache_can_shift(SafeLLamaContextHandle ctx);
+
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern LLamaPerfContextTimings llama_perf_context(SafeLLamaContextHandle ctx);
 
@@ -566,7 +574,7 @@ public void Synchronize()
         /// internally for later use by the decoder cross-attention layers.
         /// </summary>
         /// <param name="batch"></param>
-        /// <returns>0 = success <br />&lt; 0 = error</returns>
+        /// <returns>0 = success <br />&lt; 0 = error (the KV cache state is restored to the state before this call)</returns>
         public DecodeResult Encode(LLamaBatch batch)
         {
             if (batch.TokenCount == 0)
@@ -583,7 +591,7 @@ public DecodeResult Encode(LLamaBatch batch)
         /// <returns>Positive return values does not mean a fatal error, but rather a warning:<br />
         ///  - 0: success<br />
         ///  - 1: could not find a KV slot for the batch (try reducing the size of the batch or increase the context)<br />
-        ///  - &lt; 0: error<br />
+        ///  - &lt; 0: error (the KV cache state is restored to the state before this call)<br />
         /// </returns>
         public DecodeResult Decode(LLamaBatch batch)
         {
@@ -746,6 +754,11 @@ public void ResetTimings()
         #endregion
 
         #region KV Cache Management
+        /// <summary>
+        /// Check if the context supports KV cache shifting
+        /// </summary>
+        public bool KvCacheCanShift => llama_kv_cache_can_shift(this);
+
         /// <summary>
         /// Apply KV cache updates (such as K-shifts, defragmentation, etc.)
         /// </summary>
diff --git a/LLama/Native/SafeLLamaSamplerHandle.cs b/LLama/Native/SafeLLamaSamplerHandle.cs
index ef6a7ae30..3518d182e 100644
--- a/LLama/Native/SafeLLamaSamplerHandle.cs
+++ b/LLama/Native/SafeLLamaSamplerHandle.cs
@@ -1,5 +1,5 @@
 using System;
-using System.Runtime.CompilerServices;
+using System.Collections.Generic;
 using System.Text;
 
 namespace LLama.Native;
@@ -426,7 +426,20 @@ public void AddPenalties(
         int vocabSize, LLamaToken? eos, LLamaToken newline, int penaltyCount, float repeat, float freq, float presence, bool penalizeNewline, bool ignoreEOS
     )
     {
-        llama_sampler_chain_add(this, llama_sampler_init_penalties(vocabSize, eos ?? LLamaToken.InvalidToken, newline, penaltyCount, repeat, freq, presence, penalizeNewline, ignoreEOS));
+        llama_sampler_chain_add(
+            this,
+            llama_sampler_init_penalties(
+                vocabSize,
+                eos ?? LLamaToken.InvalidToken,
+                newline,
+                penaltyCount,
+                repeat,
+                freq,
+                presence,
+                penalizeNewline,
+                ignoreEOS
+            )
+        );
 
         // ReSharper disable InconsistentNaming
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
@@ -444,6 +457,62 @@ bool ignore_eos       // ignore the end-of-sequence token
         // ReSharper restore InconsistentNaming
     }
 
+    /// <summary>
+    /// DRY sampler, designed by p-e-w, as described in: <a href="https://github.com/oobabooga/text-generation-webui/pull/5677">https://github.com/oobabooga/text-generation-webui/pull/5677</a>.
+    /// Porting Koboldcpp implementation authored by pi6am: <a href="https://github.com/LostRuins/koboldcpp/pull/982">https://github.com/LostRuins/koboldcpp/pull/982</a>
+    /// </summary>
+    /// <param name="model">The model this sampler will be used with</param>
+    /// <param name="sequenceBreakers"></param>
+    /// <param name="multiplier">penalty multiplier, 0.0 = disabled</param>
+    /// <param name="base">exponential base</param>
+    /// <param name="allowedLength">repeated sequences longer than this are penalized</param>
+    /// <param name="penaltyLastN">how many tokens to scan for repetitions (0 = entire context)</param>
+    public void AddDry(SafeLlamaModelHandle model, ReadOnlySpan<string> sequenceBreakers, float multiplier = 0.8f, float @base = 1.75f, int allowedLength = 2, int penaltyLastN = 0)
+    {
+        unsafe
+        {
+            // Convert strings, fix memory in place, build array of pointers
+            var handles = new List<MemoryHandle>();
+            var breakers = stackalloc byte*[sequenceBreakers.Length];
+            for (var i = 0; i < sequenceBreakers.Length; i++)
+            {
+                var chars = Encoding.Default.GetBytes(sequenceBreakers[i]);
+                handles.Add(chars.AsMemory().Pin());
+
+                breakers[i] = (byte*)handles[i].Pointer;
+            }
+
+            llama_sampler_chain_add(
+                this,
+                llama_sampler_init_dry(
+                    model,
+                    multiplier,
+                    @base,
+                    allowedLength,
+                    penaltyLastN,
+                    breakers,
+                    (nuint)sequenceBreakers.Length
+                )
+            );
+
+            // Clear up all the handles fixing the memory in place
+            for (var i = 0; i < handles.Count; i++)
+                handles[i].Dispose();
+        }
+
+        // ReSharper disable InconsistentNaming
+        [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
+        static extern unsafe IntPtr llama_sampler_init_dry(
+            SafeLlamaModelHandle model,
+            float dry_multiplier,
+            float dry_base,
+            int    dry_allowed_length,
+            int dry_penalty_last_n,
+            byte** seq_breakers,
+            nuint    num_breakers
+        );
+    }
+
     /// <summary>
     /// Create a sampler that applies a bias directly to the logits
     /// </summary>
diff --git a/LLama/Sampling/DefaultSamplingPipeline.cs b/LLama/Sampling/DefaultSamplingPipeline.cs
index 3d166f0c6..2e3395db7 100644
--- a/LLama/Sampling/DefaultSamplingPipeline.cs
+++ b/LLama/Sampling/DefaultSamplingPipeline.cs
@@ -158,7 +158,7 @@ public float PresencePenalty
     public uint Seed { get; set; } = GetRandomSeed();
 
 
-    private static Random RandomSeedGenerator = new();
+    private static readonly Random RandomSeedGenerator = new();
     private static uint GetRandomSeed()
     {
         lock (RandomSeedGenerator)

From e4f4feda1cfb99ac3601165022143ccac07fc493 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Fri, 20 Dec 2024 01:08:43 +0000
Subject: [PATCH 02/22] Changes to support up to
 https://github.com/ggerganov/llama.cpp/commit/d408bb9268a988c5a60a5746d3a6430386e7604d

---
 LLama/Native/LLamaFtype.cs             | 24 +++++++++----------
 LLama/Native/LLamaRopeType.cs          |  2 ++
 LLama/Native/NativeApi.cs              |  9 +++++++
 LLama/Native/RopeScalingType.cs        |  7 +++++-
 LLama/Native/SafeLLamaSamplerHandle.cs | 33 +++++++-------------------
 LLama/Native/SafeLlamaModelHandle.cs   |  3 ---
 6 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/LLama/Native/LLamaFtype.cs b/LLama/Native/LLamaFtype.cs
index 6970a4728..705f8032e 100644
--- a/LLama/Native/LLamaFtype.cs
+++ b/LLama/Native/LLamaFtype.cs
@@ -177,20 +177,20 @@ public enum LLamaFtype
         /// </summary>
         MOSTLY_BF16 = 32,
 
-        /// <summary>
-        /// except 1d tensors
-        /// </summary>
-        MOSTLY_Q4_0_4_4 = 33,
+        ///// <summary>
+        ///// except 1d tensors (no longer supported by llama.cpp)
+        ///// </summary>
+        //MOSTLY_Q4_0_4_4 = 33,
 
-        /// <summary>
-        /// except 1d tensors
-        /// </summary>
-        MOSTLY_Q4_0_4_8 = 34,
+        ///// <summary>
+        ///// except 1d tensors (no longer supported by llama.cpp)
+        ///// </summary>
+        //MOSTLY_Q4_0_4_8 = 34,
 
-        /// <summary>
-        /// except 1d tensors
-        /// </summary>
-        MOSTLY_Q4_0_8_8 = 35,
+        ///// <summary>
+        ///// except 1d tensors (no longer supported by llama.cpp)
+        ///// </summary>
+        //MOSTLY_Q4_0_8_8 = 35,
 
         /// <summary>
         /// except 1d tensors
diff --git a/LLama/Native/LLamaRopeType.cs b/LLama/Native/LLamaRopeType.cs
index ebad9e77b..3f1188112 100644
--- a/LLama/Native/LLamaRopeType.cs
+++ b/LLama/Native/LLamaRopeType.cs
@@ -9,4 +9,6 @@ public enum LLamaRopeType
     None = -1,
     Norm = 0,
     NEOX = 2,//GGML_ROPE_TYPE_NEOX,
+    //todo:LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
+    //todo:LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
 }
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index 041cc0dd5..0d6bc1984 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -172,6 +172,15 @@ public static unsafe int llama_chat_apply_template(SafeLlamaModelHandle? model,
             static extern int internal_llama_chat_apply_template(IntPtr model, byte* tmpl, LLamaChatMessage* chat, nuint n_msg, [MarshalAs(UnmanagedType.U1)] bool add_ass, byte* buf, int length);
         }
 
+        /// <summary>
+        /// Get list of built-in chat templates
+        /// </summary>
+        /// <param name="output"></param>
+        /// <param name="len"></param>
+        /// <returns></returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern unsafe int llama_chat_builtin_templates(char** output, nuint len);
+
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         [return: MarshalAs(UnmanagedType.U1)]
         public static extern bool llama_add_bos_token(SafeLlamaModelHandle model);
diff --git a/LLama/Native/RopeScalingType.cs b/LLama/Native/RopeScalingType.cs
index 8d4552b80..61ae82942 100644
--- a/LLama/Native/RopeScalingType.cs
+++ b/LLama/Native/RopeScalingType.cs
@@ -1,4 +1,4 @@
-﻿namespace LLama.Native
+namespace LLama.Native
 {
     /// <summary>
     /// RoPE scaling type.
@@ -26,5 +26,10 @@ public enum RopeScalingType
         /// YaRN scaling: https://arxiv.org/pdf/2309.00071.pdf
         /// </summary>
         Yarn = 2,
+
+        /// <summary>
+        /// LongRope scaling
+        /// </summary>
+        LongRope = 3,
     }
 }
diff --git a/LLama/Native/SafeLLamaSamplerHandle.cs b/LLama/Native/SafeLLamaSamplerHandle.cs
index 3518d182e..9099c2f32 100644
--- a/LLama/Native/SafeLLamaSamplerHandle.cs
+++ b/LLama/Native/SafeLLamaSamplerHandle.cs
@@ -410,49 +410,34 @@ public void AddGrammar(SafeLlamaModelHandle model, string grammar, string root)
     }
 
     /// <summary>
-    /// Create a sampler that applies various repetition penalties
+    /// Create a sampler that applies various repetition penalties.
+    ///
+    /// Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
     /// </summary>
-    /// <param name="vocabSize">Vocab size</param>
-    /// <param name="eos">EOS token (if this model has one)</param>
-    /// <param name="newline">Newline token</param>
     /// <param name="penaltyCount">How many tokens of history to consider when calculating penalties</param>
     /// <param name="repeat">Repetition penalty</param>
     /// <param name="freq">Frequency penalty</param>
     /// <param name="presence">Presence penalty</param>
-    /// <param name="penalizeNewline">Whether or not to penalize the newline token</param>
-    /// <param name="ignoreEOS">Whether or not to ignore EOS token</param>
     /// <returns></returns>
-    public void AddPenalties(
-        int vocabSize, LLamaToken? eos, LLamaToken newline, int penaltyCount, float repeat, float freq, float presence, bool penalizeNewline, bool ignoreEOS
-    )
+    public void AddPenalties(int penaltyCount, float repeat, float freq, float presence)
     {
         llama_sampler_chain_add(
             this,
             llama_sampler_init_penalties(
-                vocabSize,
-                eos ?? LLamaToken.InvalidToken,
-                newline,
                 penaltyCount,
                 repeat,
                 freq,
-                presence,
-                penalizeNewline,
-                ignoreEOS
+                presence
             )
         );
 
         // ReSharper disable InconsistentNaming
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         static extern IntPtr llama_sampler_init_penalties(
-            int n_vocab,         // llama_n_vocab()
-            LLamaToken special_eos_id,  // llama_token_eos()
-            LLamaToken linefeed_id,     // llama_token_nl()
-            int penalty_last_n,  // last n tokens to penalize (0 = disable penalty, -1 = context size)
-            float penalty_repeat,  // 1.0 = disabled
-            float penalty_freq,    // 0.0 = disabled
-            float penalty_present, // 0.0 = disabled
-            bool penalize_nl,     // consider newlines as a repeatable token
-            bool ignore_eos       // ignore the end-of-sequence token
+            int penalty_last_n,     // last n tokens to penalize (0 = disable penalty, -1 = context size)
+            float penalty_repeat,   // 1.0 = disabled
+            float penalty_freq,     // 0.0 = disabled
+            float penalty_present   // 0.0 = disabled
         );
         // ReSharper restore InconsistentNaming
     }
diff --git a/LLama/Native/SafeLlamaModelHandle.cs b/LLama/Native/SafeLlamaModelHandle.cs
index 718b81809..303ae3352 100644
--- a/LLama/Native/SafeLlamaModelHandle.cs
+++ b/LLama/Native/SafeLlamaModelHandle.cs
@@ -441,9 +441,6 @@ private static int llama_model_meta_val_str(SafeLlamaModelHandle model, string k
         [DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
         private static extern LLamaTokenAttr llama_token_get_attr(SafeLlamaModelHandle model, LLamaToken token);
 
-        //[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
-        //private static extern GGMLTensor llama_get_model_tensor(SafeLlamaModelHandle model, string name);
-
         /// <summary>
         /// Returns true if the model contains an encoder that requires llama_encode() call
         /// </summary>

From c90ddd989a47eb0c36f5e49c8d9f415b7681056d Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Fri, 27 Dec 2024 21:08:35 +0000
Subject: [PATCH 03/22] Updated to latest llama.cpp binaries, this works on
 Windows CPU but needs more changes for other backends

---
 LLama.Examples/Program.cs                     |  6 +-
 LLama.Web/Common/ModelOptions.cs              |  5 +-
 LLama/Abstractions/IModelParams.cs            |  7 ++-
 LLama/Common/ModelParams.cs                   |  5 +-
 LLama/Extensions/IModelParamsExtensions.cs    |  6 +-
 LLama/Extensions/LLamaExecutorExtensions.cs   |  2 +-
 LLama/LLamaQuantizer.cs                       |  3 -
 LLama/LLamaSharp.Runtime.targets              | 36 ++++++++++++
 LLama/LLamaSharp.csproj                       |  2 +-
 LLama/LLamaStatelessExecutor.cs               | 12 ++--
 LLama/Native/LLamaModelParams.cs              | 14 ++---
 LLama/Native/Load/NativeLibraryConfig.cs      |  4 +-
 LLama/Sampling/DefaultSamplingPipeline.cs     | 56 +------------------
 .../build/LLamaSharp.Backend.Cpu.nuspec       | 15 +++--
 14 files changed, 87 insertions(+), 86 deletions(-)

diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs
index ec4e20b03..d4c3bae15 100644
--- a/LLama.Examples/Program.cs
+++ b/LLama.Examples/Program.cs
@@ -1,6 +1,5 @@
 using LLama.Native;
 using Spectre.Console;
-using System.Runtime.InteropServices;
 
 AnsiConsole.MarkupLineInterpolated(
     $"""
@@ -30,9 +29,8 @@ __       __                                       ____     __
 // Configure native library to use. This must be done before any other llama.cpp methods are called!
 NativeLibraryConfig
    .All
-   .WithCuda(false)
-   .WithVulkan(false)
-   .DryRun(out var loadedllamaLibrary, out var loadedLLavaLibrary);
+   .WithCuda()
+   .WithVulkan();
 
 // Calling this method forces loading to occur now.
 NativeApi.llama_empty_call();
diff --git a/LLama.Web/Common/ModelOptions.cs b/LLama.Web/Common/ModelOptions.cs
index 4e002c93f..a67a11a96 100644
--- a/LLama.Web/Common/ModelOptions.cs
+++ b/LLama.Web/Common/ModelOptions.cs
@@ -24,7 +24,7 @@ public class ModelOptions
         public int MainGpu { get; set; } = 0;
 
         /// <inheritdoc />
-        public GPUSplitMode SplitMode { get; set; } = GPUSplitMode.None;
+        public GPUSplitMode? SplitMode { get; set; }
 
         /// <inheritdoc />
         public int GpuLayerCount { get; set; } = 20;
@@ -59,6 +59,9 @@ public class ModelOptions
         /// <inheritdoc />
         public TensorSplitsCollection TensorSplits { get; set; } = new();
 
+        /// <inheritdoc />
+        public bool CheckTensors { get; }
+
         /// <inheritdoc />
         public List<MetadataOverride> MetadataOverrides { get; } = new();
 
diff --git a/LLama/Abstractions/IModelParams.cs b/LLama/Abstractions/IModelParams.cs
index 7dc28f671..cbbacafe5 100644
--- a/LLama/Abstractions/IModelParams.cs
+++ b/LLama/Abstractions/IModelParams.cs
@@ -36,7 +36,7 @@ public interface IModelParams
         /// <summary>
         /// How to split the model across multiple GPUs
         /// </summary>
-        GPUSplitMode SplitMode { get; }
+        GPUSplitMode? SplitMode { get; }
 
         /// <summary>
         /// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
@@ -68,6 +68,11 @@ public interface IModelParams
         /// </summary>
         bool VocabOnly { get; }
 
+        /// <summary>
+        /// Validate model tensor data before loading
+        /// </summary>
+        bool CheckTensors { get; }
+
         /// <summary>
         /// Override specific metadata items in the model
         /// </summary>
diff --git a/LLama/Common/ModelParams.cs b/LLama/Common/ModelParams.cs
index b276ed73a..7e4b1a967 100644
--- a/LLama/Common/ModelParams.cs
+++ b/LLama/Common/ModelParams.cs
@@ -19,7 +19,7 @@ public record ModelParams
         public int MainGpu { get; set; } = 0;
 
         /// <inheritdoc />
-        public GPUSplitMode SplitMode { get; set; } = GPUSplitMode.None;
+        public GPUSplitMode? SplitMode { get; set; }
 
         /// <inheritdoc />
         public int GpuLayerCount { get; set; } = 20;
@@ -54,6 +54,9 @@ public record ModelParams
         /// <inheritdoc />
         public TensorSplitsCollection TensorSplits { get; set; } = new();
 
+        /// <inheritdoc />
+        public bool CheckTensors { get; }
+
         /// <inheritdoc />
         public List<MetadataOverride> MetadataOverrides { get; set; } = new();
 
diff --git a/LLama/Extensions/IModelParamsExtensions.cs b/LLama/Extensions/IModelParamsExtensions.cs
index d704b2e8c..588564e33 100644
--- a/LLama/Extensions/IModelParamsExtensions.cs
+++ b/LLama/Extensions/IModelParamsExtensions.cs
@@ -30,13 +30,15 @@ public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLam
 
         result = LLamaModelParams.Default();
 
-        result.devices = IntPtr.Zero;
         result.main_gpu = @params.MainGpu;
-        result.split_mode = @params.SplitMode;
         result.n_gpu_layers = @params.GpuLayerCount < 0 ? int.MaxValue : @params.GpuLayerCount;
+        if (@params.SplitMode.HasValue)
+            result.split_mode = @params.SplitMode.Value;
+
         result.use_mlock = @params.UseMemoryLock;
         result.use_mmap = @params.UseMemorymap;
         result.vocab_only = @params.VocabOnly;
+        result.check_tensors = @params.CheckTensors;
 
         unsafe
         {
diff --git a/LLama/Extensions/LLamaExecutorExtensions.cs b/LLama/Extensions/LLamaExecutorExtensions.cs
index 19c8d33df..e38ccf98d 100644
--- a/LLama/Extensions/LLamaExecutorExtensions.cs
+++ b/LLama/Extensions/LLamaExecutorExtensions.cs
@@ -147,7 +147,7 @@ private string CreatePrompt(IList<ChatMessage> messages)
                     PreventEOS = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PreventEOS), out bool eos) is true ? eos : s_defaultPipeline.PreventEOS,
                     PenalizeNewline = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PenalizeNewline), out bool pnl) is true ? pnl : s_defaultPipeline.PenalizeNewline,
                     RepeatPenalty = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.RepeatPenalty), out float rp) is true ? rp : s_defaultPipeline.RepeatPenalty,
-                    RepeatPenaltyCount = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.RepeatPenaltyCount), out int rpc) is true ? rpc : s_defaultPipeline.RepeatPenaltyCount,
+                    PenaltyCount = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.PenaltyCount), out int rpc) is true ? rpc : s_defaultPipeline.PenaltyCount,
                     Grammar = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.Grammar), out Grammar? g) is true ? g : s_defaultPipeline.Grammar,
                     MinKeep = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinKeep), out int mk) is true ? mk : s_defaultPipeline.MinKeep,
                     MinP = options?.AdditionalProperties?.TryGetValue(nameof(DefaultSamplingPipeline.MinP), out float mp) is true ? mp : s_defaultPipeline.MinP,
diff --git a/LLama/LLamaQuantizer.cs b/LLama/LLamaQuantizer.cs
index 23f0f8b4e..9e90b732e 100644
--- a/LLama/LLamaQuantizer.cs
+++ b/LLama/LLamaQuantizer.cs
@@ -106,9 +106,6 @@ private static bool ValidateFtype(LLamaFtype ftype)
                 case LLamaFtype.MOSTLY_IQ3_S:
                 case LLamaFtype.MOSTLY_IQ3_M:
 
-                case LLamaFtype.MOSTLY_Q4_0_4_4:
-                case LLamaFtype.MOSTLY_Q4_0_4_8:
-                case LLamaFtype.MOSTLY_Q4_0_8_8:
                     return true;
 
                 case LLamaFtype.GUESSED:
diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 6466a1204..fd3b3061e 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -12,6 +12,15 @@
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/win-x64/native/noavx/ggml.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/ggml-base.dll">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/win-x64/native/noavx/ggml-base.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/ggml-cpu.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/noavx/ggml-cpu.dll</Link>
+      </None>
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx/llama.dll</Link>
@@ -20,22 +29,49 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx/ggml.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/ggml-base.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx/ggml-base.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/ggml-cpu.dll">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/win-x64/native/avx/ggml-cpu.dll</Link>
+      </None>
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx2/llama.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/ggml-base.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx2/ggml-base.dll</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/ggml.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx2/ggml.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/ggml-cpu.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx2/ggml-cpu.dll</Link>
+      </None>
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx512/llama.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/ggml-base.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx512/ggml-base.dll</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/ggml.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx512/ggml.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/ggml-cpu.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx512/ggml-cpu.dll</Link>
+      </None>
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/cuda11/llama.dll</Link>
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 189c3d94a..0ce3714ca 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>c9b00a70b080d</BinaryReleaseId>
+    <BinaryReleaseId>d79d8f39b4da6</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/LLama/LLamaStatelessExecutor.cs b/LLama/LLamaStatelessExecutor.cs
index a4b9920dc..393bebc3f 100644
--- a/LLama/LLamaStatelessExecutor.cs
+++ b/LLama/LLamaStatelessExecutor.cs
@@ -23,14 +23,14 @@ public class StatelessExecutor
         private readonly ILogger? _logger;
         private readonly LLamaBatch _batch;
 
-        // LLava Section
+        /// <inheritdoc />
         public bool IsMultiModal => false;
 
         /// <inheritdoc />
-        public LLavaWeights? ClipModel { get;  }
+        public LLavaWeights? ClipModel => default;
 
         /// <inheritdoc />
-        public List<byte[]> Images { get; set; }
+        public List<byte[]> Images { get; }
 
         /// <summary>
         /// The context used by the executor when running the inference.
@@ -68,7 +68,7 @@ public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams
             Context = context;
 
             // Reset the sampling pipeline (if there is one)
-            inferenceParams?.SamplingPipeline?.Reset();
+            inferenceParams?.SamplingPipeline.Reset();
 
             // Sanity check inference params
             inferenceParams ??= new InferenceParams();
@@ -134,8 +134,8 @@ public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams
                     var n_left = n_past - tokensKeep;
                     var n_discard = n_left / 2;
 
-                    NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, (LLamaSeqId)0, tokensKeep , tokensKeep + n_discard);
-                    NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, (LLamaSeqId)0, tokensKeep + n_discard, n_past, -n_discard);
+                    NativeApi.llama_kv_cache_seq_rm(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep , tokensKeep + n_discard);
+                    NativeApi.llama_kv_cache_seq_add(Context.NativeHandle, LLamaSeqId.Zero, tokensKeep + n_discard, n_past, -n_discard);
 
                     n_past -= n_discard;
                 }
diff --git a/LLama/Native/LLamaModelParams.cs b/LLama/Native/LLamaModelParams.cs
index e3394892e..e16e3263e 100644
--- a/LLama/Native/LLamaModelParams.cs
+++ b/LLama/Native/LLamaModelParams.cs
@@ -10,9 +10,9 @@ public unsafe struct LLamaModelParams
     {
         /// <summary>
         /// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
+        /// todo: add support for llama_model_params.devices
         /// </summary>
-        //ggml_backend_dev_t* devices;
-        public IntPtr devices;
+        private IntPtr devices;
 
         /// <summary>
         /// // number of layers to store in VRAM
@@ -32,12 +32,12 @@ public unsafe struct LLamaModelParams
         /// <summary>
         /// how to split layers across multiple GPUs (size: <see cref="NativeApi.llama_max_devices"/>)
         /// </summary>
-        public float* tensor_split;
-
-        /// <summary>
-        /// comma separated list of RPC servers to use for offloading
+        public float* tensor_split;
+
+        /// <summary>
+        /// comma separated list of RPC servers to use for offloading
         /// </summary>
-        public byte* rpc_servers;
+        public byte* rpc_servers;
 
         /// <summary>
         /// called with a progress value between 0 and 1, pass NULL to disable. If the provided progress_callback
diff --git a/LLama/Native/Load/NativeLibraryConfig.cs b/LLama/Native/Load/NativeLibraryConfig.cs
index 02e47b695..2bfa0554b 100644
--- a/LLama/Native/Load/NativeLibraryConfig.cs
+++ b/LLama/Native/Load/NativeLibraryConfig.cs
@@ -178,7 +178,7 @@ internal Description CheckAndGatherDescription()
                 _avxLevel,
                 _allowFallback,
                 _skipCheck,
-                _searchDirectories.Concat(new[] { "./" }).ToArray()
+                _searchDirectories.Concat([ "./" ]).ToArray()
             );
         }
 
@@ -186,7 +186,7 @@ internal static string AvxLevelToString(AvxLevel level)
         {
             return level switch
             {
-                AvxLevel.None => string.Empty,
+                AvxLevel.None => "noavx",
                 AvxLevel.Avx => "avx",
                 AvxLevel.Avx2 => "avx2",
                 AvxLevel.Avx512 => "avx512",
diff --git a/LLama/Sampling/DefaultSamplingPipeline.cs b/LLama/Sampling/DefaultSamplingPipeline.cs
index 2e3395db7..76404bc95 100644
--- a/LLama/Sampling/DefaultSamplingPipeline.cs
+++ b/LLama/Sampling/DefaultSamplingPipeline.cs
@@ -20,44 +20,6 @@ public sealed class DefaultSamplingPipeline
     /// </summary>
     public float RepeatPenalty { get; init; } = 1;
 
-    /// <summary>
-    /// Frequency penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text
-    /// so far, decreasing the model's likelihood to repeat the same line verbatim.
-    /// </summary>
-    [Obsolete($"Use {nameof(FrequencyPenalty)} instead.")]
-    public float AlphaFrequency
-    {
-        get => _frequencyPenalty;
-        init
-        {
-            if (value < -2)
-                throw new ArgumentOutOfRangeException(nameof(value), $"{nameof(AlphaFrequency)} must be greater than -2");
-            if (value > 2)
-                throw new ArgumentOutOfRangeException(nameof(value), $"{nameof(AlphaFrequency)} must be less than 2");
-            _frequencyPenalty = value;
-        }
-    }
-
-    /// <summary>
-    /// Presence penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the
-    /// text so far, increasing the model's likelihood to talk about new topics.
-    /// </summary>
-    [Obsolete($"Use {nameof(PresencePenalty)} instead.")]
-    public float AlphaPresence
-    {
-        get => _presencePenalty;
-        init
-        {
-            if (value < -2)
-                throw new ArgumentOutOfRangeException(nameof(value), $"{nameof(AlphaPresence)} must be greater than -2");
-            if (value > 2)
-                throw new ArgumentOutOfRangeException(nameof(value), $"{nameof(AlphaPresence)} must be less than 2");
-            _presencePenalty = value;
-        }
-    }
-
     /// <summary>
     /// Frequency penalty as described by OpenAI: https://platform.openai.com/docs/api-reference/chat/create<br />
     /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text
@@ -97,21 +59,15 @@ public float PresencePenalty
     private readonly float _presencePenalty;
 
     /// <summary>
-    /// How many tokens should be considered for penalizing repetition
+    /// How many tokens should be considered for penalties
     /// </summary>
-    public int RepeatPenaltyCount { get; init; } = 64;
+    public int PenaltyCount { get; init; } = 64;
 
     /// <summary>
     /// Whether the newline token should be protected from being modified by penalty
     /// </summary>
     public bool PenalizeNewline { get; init; } = false;
 
-    /// <summary>
-    /// Whether the EOS token should be protected from being modified by penalty
-    /// </summary>
-    [Obsolete($"This doesn't do what the name implies. If you're sure you want to use it, use {nameof(PreventEOS)}.")]
-    public bool PenalizeEOS { get; init; } = false;
-
     /// <summary>
     /// Whether the EOS token should be suppressed. Setting this to 'true' prevents EOS from being sampled
     /// </summary>
@@ -196,13 +152,7 @@ protected override SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandl
         if (Grammar != null)
             chain.AddGrammar(context.ModelHandle, Grammar.Gbnf, Grammar.Root);
 
-        chain.AddPenalties(
-            context.VocabCount,
-            context.ModelHandle.Tokens.EOS, context.ModelHandle.Tokens.Newline ?? 0,
-            RepeatPenaltyCount, RepeatPenalty,
-            FrequencyPenalty, PresencePenalty,
-            PenalizeNewline, PreventEOS
-        );
+        chain.AddPenalties(PenaltyCount, RepeatPenalty, FrequencyPenalty, PresencePenalty);
 
         chain.AddTopK(TopK);
         chain.AddTypical(TypicalP, MinKeep);
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index 0203aad2b..ab8d11c04 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -18,13 +18,20 @@
   <files>
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cpu.props" />
 
-    <file src="runtimes/deps/ggml.dll" target="runtimes\win-x64\native\ggml.dll" />
+    <file src="runtimes/deps/ggml-base.dll" target="runtimes\win-x64\native\ggml-base.dll" />
+    <file src="runtimes/deps/ggml-cpu.dll" target="runtimes\win-x64\native\ggml-cpu.dll" />
     <file src="runtimes/deps/llama.dll" target="runtimes\win-x64\native\llama.dll" />
-    <file src="runtimes/deps/avx/ggml.dll" target="runtimes\win-x64\native\avx\ggml.dll" />
+      
+    <file src="runtimes/deps/avx/ggml-base.dll" target="runtimes\win-x64\native\avx\ggml-base.dll" />
+    <file src="runtimes/deps/avx/ggml-cpu.dll" target="runtimes\win-x64\native\avx\ggml-cpu.dll" />
     <file src="runtimes/deps/avx/llama.dll" target="runtimes\win-x64\native\avx\llama.dll" />
-    <file src="runtimes/deps/avx2/ggml.dll" target="runtimes\win-x64\native\avx2\ggml.dll" />
+      
+    <file src="runtimes/deps/avx2/ggml-base.dll" target="runtimes\win-x64\native\avx2\ggml-base.dll" />
+    <file src="runtimes/deps/avx2/ggml-cpu.dll" target="runtimes\win-x64\native\avx2\ggml-cpu.dll" />
     <file src="runtimes/deps/avx2/llama.dll" target="runtimes\win-x64\native\avx2\llama.dll" />
-    <file src="runtimes/deps/avx512/ggml.dll" target="runtimes\win-x64\native\avx512\ggml.dll" />
+      
+    <file src="runtimes/deps/avx512/ggml-base.dll" target="runtimes\win-x64\native\avx512\ggml-base.dll" />
+    <file src="runtimes/deps/avx512/ggml-cpu.dll" target="runtimes\win-x64\native\avx512\ggml-cpu.dll" />
     <file src="runtimes/deps/avx512/llama.dll" target="runtimes\win-x64\native\avx512\llama.dll" />
 
     <file src="runtimes/deps/libggml.so" target="runtimes\linux-x64\native\libggml.so" />

From c27cfde13d44a22a3a2022a2bfd19fdf62b8d889 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Sat, 4 Jan 2025 00:46:22 +0000
Subject: [PATCH 04/22] Updated to latest deps, fixed kernel memory failing to
 load

---
 .../LLamaSharpTextEmbeddingGenerator.cs        |  4 +---
 LLama.KernelMemory/LlamaSharpTextGenerator.cs  |  2 --
 .../LLamaSharpTextEmbeddingGeneratorTests.cs   | 14 ++++----------
 .../LlamaSharpTextGeneratorTests.cs            | 18 ++++--------------
 LLama.Unittest/SamplingTests.cs                |  6 +-----
 LLama/LLamaSharp.csproj                        |  2 +-
 6 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
index 7f9ae1e4d..6efd44f7b 100644
--- a/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
+++ b/LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
@@ -31,11 +31,9 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
 
             var @params = new ModelParams(config.ModelPath)
             {
-                ContextSize = config.ContextSize ?? 2048,
+                ContextSize = config.ContextSize,
                 GpuLayerCount = config.GpuLayerCount ?? 20,
                 Embeddings = true,
-                MainGpu = config.MainGpu,
-                SplitMode = config.SplitMode,
                 PoolingType = LLamaPoolingType.Mean,
             };
             _weights = LLamaWeights.LoadFromFile(@params);
diff --git a/LLama.KernelMemory/LlamaSharpTextGenerator.cs b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
index adfc89317..3fc96db9a 100644
--- a/LLama.KernelMemory/LlamaSharpTextGenerator.cs
+++ b/LLama.KernelMemory/LlamaSharpTextGenerator.cs
@@ -33,8 +33,6 @@ public LlamaSharpTextGenerator(LLamaSharpConfig config)
             {
                 ContextSize = config.ContextSize ?? 2048,
                 GpuLayerCount = config.GpuLayerCount ?? 20,
-                MainGpu = config.MainGpu,
-                SplitMode = config.SplitMode
             };
             _weights = LLamaWeights.LoadFromFile(parameters);
             _context = _weights.CreateContext(parameters);
diff --git a/LLama.Unittest/KernelMemory/LLamaSharpTextEmbeddingGeneratorTests.cs b/LLama.Unittest/KernelMemory/LLamaSharpTextEmbeddingGeneratorTests.cs
index 91161b72c..5c7b4213d 100644
--- a/LLama.Unittest/KernelMemory/LLamaSharpTextEmbeddingGeneratorTests.cs
+++ b/LLama.Unittest/KernelMemory/LLamaSharpTextEmbeddingGeneratorTests.cs
@@ -1,21 +1,15 @@
-using LLama.Common;
 using LLamaSharp.KernelMemory;
-using Microsoft.KernelMemory.AI;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Text.RegularExpressions;
-using System.Threading.Tasks;
 using Xunit.Abstractions;
 
 namespace LLama.Unittest.KernelMemory
 {
-    public class LLamaSharpTextEmbeddingGeneratorTests : ITextTokenizerTests, IDisposable
+    public class LLamaSharpTextEmbeddingGeneratorTests
+        : ITextTokenizerTests, IDisposable
     {
         private readonly LLamaSharpTextEmbeddingGenerator _embeddingGenerator;
 
-        public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
+        public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper)
+            : base(testOutputHelper)
         {
             _embeddingGenerator = new LLamaSharpTextEmbeddingGenerator(_lsConfig);
             
diff --git a/LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs b/LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
index 02001f8cf..d21d7f959 100644
--- a/LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
+++ b/LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
@@ -1,25 +1,15 @@
-using LLama.Common;
 using LLamaSharp.KernelMemory;
-using Microsoft.KernelMemory.AI;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-using System.Reflection.Emit;
-using System.Text;
-using System.Text.RegularExpressions;
-using System.Threading.Tasks;
 using Xunit.Abstractions;
-using Xunit.Sdk;
-using static System.Net.Mime.MediaTypeNames;
 
 namespace LLama.Unittest.KernelMemory
 {
-    public class LlamaSharpTextGeneratorTests : ITextTokenizerTests, IDisposable
+    public class LlamaSharpTextGeneratorTests
+        : ITextTokenizerTests, IDisposable
     {        
         private readonly LlamaSharpTextGenerator _textGenerator;
 
-        public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
+        public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper)
+            : base(testOutputHelper)
         {            
             _textGenerator = new LlamaSharpTextGenerator(_lsConfig);
 
diff --git a/LLama.Unittest/SamplingTests.cs b/LLama.Unittest/SamplingTests.cs
index f322bc250..bae7e3dea 100644
--- a/LLama.Unittest/SamplingTests.cs
+++ b/LLama.Unittest/SamplingTests.cs
@@ -167,11 +167,7 @@ private static SafeLLamaSamplerChainHandle CreateChain(SafeLLamaContextHandle co
             var chain = SafeLLamaSamplerChainHandle.Create(LLamaSamplerChainParams.Default());
 
             chain.AddPenalties(
-                vocabSize: context.VocabCount,
-                eos: context.ModelHandle.Tokens.EOS,
-                newline: context.ModelHandle.Tokens.Newline ?? 0,
-                penaltyCount: 60, repeat: 1, freq: 0, presence: 0,
-                penalizeNewline: false, ignoreEOS: false
+                penaltyCount: 60, repeat: 1, freq: 0, presence: 0
             );
 
             if (logit_bias != null) { chain.AddLogitBias(context.VocabCount, logit_bias); }
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 0ce3714ca..0e51077e4 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>d79d8f39b4da6</BinaryReleaseId>
+    <BinaryReleaseId>0827b2c1da</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>

From a5c97594c9f4f1ba3f685e5a0435b7daec3056df Mon Sep 17 00:00:00 2001
From: SignalRT <admin@signalrt.com>
Date: Sat, 4 Jan 2025 15:28:54 +0100
Subject: [PATCH 05/22] Copy missing Mac flibraries libggml-base and
 libggml-cpu

---
 LLama/LLamaSharp.Runtime.targets | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index fd3b3061e..e58b9e89f 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -154,6 +154,14 @@
         <Link>runtimes/linux-x64/native/vulkan/libggml.so</Link>
       </None>
 
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-base.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-arm64/native/libggml-base.dylib</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-cpu.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-arm64/native/libggml-cpu.dylib</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml.dylib">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-arm64/native/libggml.dylib</Link>
@@ -170,7 +178,15 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-arm64/native/ggml-metal.metal</Link>
       </None>
-        
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libggml-base.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-x64/native/libggml-base.dylib</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libggml-cpu.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-x64/native/libggml-cpu.dylib</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libggml.dylib">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-x64/native/libggml.dylib</Link>
@@ -184,6 +200,14 @@
         <Link>runtimes/osx-x64/native/libllava_shared.dylib</Link>
       </None>
 
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64-rosetta2/libggml-base.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-x64/native/rosetta2/libggml-base.dylib</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64-rosetta2/libggml-cpu.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-x64/native/rosetta2/libggml-cpu.dylib</Link>
+      </None>        
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64-rosetta2/libggml.dylib">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-x64/native/rosetta2/libggml.dylib</Link>

From 34198f901a341286374a7d25fc71956166883c45 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Sat, 4 Jan 2025 21:18:59 +0000
Subject: [PATCH 06/22] Removed any mention of AVX in MacOS loading

---
 .../DefaultNativeLibrarySelectingPolicy.cs    |  4 +--
 .../Load/NativeLibraryWithMacOrFallback.cs    | 31 ++++++-------------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs
index 6f5ad35fe..497902ba1 100644
--- a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs
+++ b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs
@@ -10,8 +10,6 @@ public class DefaultNativeLibrarySelectingPolicy: INativeLibrarySelectingPolicy
         /// <inheritdoc/>
         public IEnumerable<INativeLibrary> Apply(NativeLibraryConfig.Description description, SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback)
         {
-            List<INativeLibrary> results = new();
-
             // Show the configuration we're working with
             Log(description.ToString(), LLamaLogLevel.Info, logCallback);
 
@@ -56,7 +54,7 @@ public IEnumerable<INativeLibrary> Apply(NativeLibraryConfig.Description descrip
 
                 if(systemInfo.OSPlatform == OSPlatform.OSX || description.AllowFallback)
                 {
-                    yield return new NativeLibraryWithMacOrFallback(description.Library, description.SkipCheck);
+                    yield return new NativeLibraryWithMacOrFallback(description.Library);
                 }
             }
         }
diff --git a/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs
index 6bcd55049..59754be03 100644
--- a/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs
+++ b/LLama/Native/Load/NativeLibraryWithMacOrFallback.cs
@@ -1,5 +1,5 @@
-using LLama.Abstractions;
 using System.Collections.Generic;
+using LLama.Abstractions;
 
 namespace LLama.Native
 {
@@ -7,39 +7,30 @@ namespace LLama.Native
     /// <summary>
     /// A native library compiled on Mac, or fallbacks from all other libraries in the selection.
     /// </summary>
-    public class NativeLibraryWithMacOrFallback : INativeLibrary
+    public class NativeLibraryWithMacOrFallback
+        : INativeLibrary
     {
-        private NativeLibraryName _libraryName;
-        private bool _skipCheck;
+        private readonly NativeLibraryName _libraryName;
 
         /// <inheritdoc/>
-        public NativeLibraryMetadata? Metadata
-        {
-            get
-            {
-                return new NativeLibraryMetadata(_libraryName, false, false, AvxLevel.None);
-            }
-        }
+        public NativeLibraryMetadata Metadata => new(_libraryName, false, false, AvxLevel.None);
 
         /// <summary>
         /// 
         /// </summary>
         /// <param name="libraryName"></param>
-        /// <param name="skipCheck"></param>
-        public NativeLibraryWithMacOrFallback(NativeLibraryName libraryName, bool skipCheck)
+        public NativeLibraryWithMacOrFallback(NativeLibraryName libraryName)
         {
             _libraryName = libraryName;
-            _skipCheck = skipCheck;
         }
 
         /// <inheritdoc/>
         public IEnumerable<string> Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaLogCallback? logCallback)
         {
-            var path = GetPath(systemInfo, AvxLevel.None, logCallback);
-            return path is null ?[] : [path];
+            yield return GetPath(systemInfo);
         }
 
-        private string? GetPath(SystemInfo systemInfo, AvxLevel avxLevel, NativeLogConfig.LLamaLogCallback? logCallback)
+        private string GetPath(SystemInfo systemInfo)
         {
             NativeLibraryUtils.GetPlatformPathParts(systemInfo.OSPlatform, out var os, out var fileExtension, out var libPrefix);
             string relativePath;
@@ -50,11 +41,7 @@ public IEnumerable<string> Prepare(SystemInfo systemInfo, NativeLogConfig.LLamaL
             }
             else
             {
-                var avxStr = NativeLibraryConfig.AvxLevelToString(AvxLevel.None);
-                if (!string.IsNullOrEmpty(avxStr))
-                    avxStr += "/";
-
-                relativePath = $"runtimes/{os}/native/{avxStr}{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}";
+                relativePath = $"runtimes/{os}/native/{libPrefix}{_libraryName.GetLibraryName()}{fileExtension}";
             }
 
             return relativePath;

From 3d931749774be66d58bfa451af104ae1aae79c31 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Sat, 11 Jan 2025 00:37:07 +0000
Subject: [PATCH 07/22] Added file copying for some more targets (still missing
 macos)

---
 LLama/LLamaSharp.Runtime.targets | 103 ++++++++++++++++++++++++++++++-
 1 file changed, 102 insertions(+), 1 deletion(-)

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index e58b9e89f..5a49ccc76 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -21,6 +21,7 @@
         <Link>runtimes/win-x64/native/noavx/ggml-cpu.dll</Link>
       </None>
 
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx/llama.dll</Link>
@@ -38,6 +39,7 @@
           <Link>runtimes/win-x64/native/avx/ggml-cpu.dll</Link>
       </None>
 
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx2/llama.dll</Link>
@@ -55,6 +57,7 @@
         <Link>runtimes/win-x64/native/avx2/ggml-cpu.dll</Link>
       </None>
 
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/avx512/llama.dll</Link>
@@ -72,30 +75,60 @@
         <Link>runtimes/win-x64/native/avx512/ggml-cpu.dll</Link>
       </None>
 
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/cuda11/llama.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/ggml-base.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/cuda11/ggml-base.dll</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/ggml.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/cuda11/ggml.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/ggml-cuda.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/cuda11/ggml-cuda.dll</Link>
+      </None>
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/cuda12/llama.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/ggml-base.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/cuda12/ggml-base.dll</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/ggml.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-        <Link>runtimes/win-x64/native/cuda12/ggml.dll</Link>
+        <Link>runtimes/win-x64/native/cuda11/ggml.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/ggml-cuda.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/cuda11/ggml-cuda.dll</Link>
       </None>
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/vulkan/llama.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/ggml-base.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/vulkan/ggml-base.dll</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/ggml.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/vulkan/ggml.dll</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/ggml-vulkan.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/vulkan/ggml-vulkan.dll</Link>
+      </None>
+
 
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
@@ -105,6 +138,16 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/noavx/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/noavx/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/noavx/libggml-cpu.so</Link>
+      </None>
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx/libllama.so</Link>
@@ -113,6 +156,17 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/avx/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx/libggml-cpu.so</Link>
+      </None>
+
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx2/libllama.so</Link>
@@ -121,6 +175,15 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx2/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/avx2/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx2/libggml-cpu.so</Link>
+      </None>
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx512/libllama.so</Link>
@@ -129,6 +192,15 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/avx512/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/avx512/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libggml-cpu.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx512/libggml-cpu.so</Link>
+      </None>
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/cuda11/libllama.so</Link>
@@ -137,6 +209,16 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/cuda11/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/cuda11/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libggml-cuda.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/cuda11/libggml-cuda.so</Link>
+      </None>
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/cuda12/libllama.so</Link>
@@ -145,6 +227,16 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/cuda12/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/cuda12/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/libggml-cuda.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/cuda12/libggml-cuda.so</Link>
+      </None>
+
+
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/vulkan/libllama.so</Link>
@@ -153,6 +245,15 @@
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/vulkan/libggml.so</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/libggml-base.so">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/linux-x64/native/vulkan/libggml-base.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/vulkan/libggml-vulkan.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/vulkan/libggml-vulkan.so</Link>
+      </None>
+
 
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-base.dylib">
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>

From 0647df9b1e06df73beb636e0e0807392ae33a6a7 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Sat, 11 Jan 2025 21:46:06 +0000
Subject: [PATCH 08/22] Updated to latest set of binaries

---
 LLama.Examples/Program.cs | 2 +-
 LLama/LLamaSharp.csproj   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/LLama.Examples/Program.cs b/LLama.Examples/Program.cs
index d4c3bae15..f8ef7d5aa 100644
--- a/LLama.Examples/Program.cs
+++ b/LLama.Examples/Program.cs
@@ -17,7 +17,7 @@ __       __                                       ____     __
     """);
 
 // Configure logging. Change this to `true` to see log messages from llama.cpp
-var showLLamaCppLogs = false;
+var showLLamaCppLogs = true;
 NativeLibraryConfig
    .All
    .WithLogCallback((level, message) =>
diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 0e51077e4..4d0d19eb3 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>0827b2c1da</BinaryReleaseId>
+    <BinaryReleaseId>0827b2c1da-v2</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>

From 756a88f439e022744ff7322b9dedaee4a1d2203e Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Sat, 11 Jan 2025 21:52:24 +0000
Subject: [PATCH 09/22] Fixed copy path for CUDA12 DLLs

---
 LLama/LLamaSharp.Runtime.targets | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 5a49ccc76..10140cc06 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -104,11 +104,11 @@
       </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/ggml.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-        <Link>runtimes/win-x64/native/cuda11/ggml.dll</Link>
+        <Link>runtimes/win-x64/native/cuda12/ggml.dll</Link>
       </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.2.0/ggml-cuda.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-        <Link>runtimes/win-x64/native/cuda11/ggml-cuda.dll</Link>
+        <Link>runtimes/win-x64/native/cuda12/ggml-cuda.dll</Link>
       </None>
 
 

From 4950e0da66ae492ccdf263fada92c90aa78896ed Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Fri, 17 Jan 2025 23:34:12 +0100
Subject: [PATCH 10/22] Compatibility with llama.cpp backend split (PR #10256)
 on all platforms

---
 LLama/Native/Load/NativeLibraryUtils.cs | 90 +++++++++++++++++++------
 1 file changed, 68 insertions(+), 22 deletions(-)

diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs
index d5b014ce0..8227e8cc1 100644
--- a/LLama/Native/Load/NativeLibraryUtils.cs
+++ b/LLama/Native/Load/NativeLibraryUtils.cs
@@ -43,35 +43,81 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib
                 
                 foreach (var path in paths)
                 {
-                    Log($"Got relative library path '{path}' from local with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback);
+                    // After the llama.cpp binaries have been split up (PR #10256), we need to load the dependencies manually.
+                    // It can't be done automatically on Windows, because the dependencies can be in different folders (for example, ggml-cuda.dll from the cuda12 folder, and ggml-cpu.dll from the avx2 folder)
+                    // It can't be done automatically on Linux, because Linux uses the environment variable "LD_LIBRARY_PATH" to automatically load dependencies, and LD_LIBRARY_PATH can only be
+                    // set before running LLamaSharp, but we only know which folders to search in when running LLamaSharp (decided by the NativeLibrary).
                     
-                    // If we are on Linux / OSX, we need to manually load the GGML dependency
-                    if (systemInfo.OSPlatform == OSPlatform.Linux || systemInfo.OSPlatform == OSPlatform.OSX)
+                    // Get the directory of the current runtime
+                    string? currentRuntimeDirectory = Path.GetDirectoryName(path);
+
+                    // If we failed to get the directory of the current runtime, log it and continue on to the next library
+                    if (currentRuntimeDirectory == null)
                     {
-                        // Get the directory of the library
-                        string? libraryDirectory = Path.GetDirectoryName(path);
-                        
-                        if (libraryDirectory != null)
+                        Log($"Failed to get the directory of the current runtime from path '{path}'", LLamaLogLevel.Error, config.LogCallback);
+                        continue;
+                    }
+
+                    // List which will hold all paths to dependencies to load
+                    var dependencyPaths = new List<string>();
+                    
+                    // We should always load ggml-base from the current runtime directory
+                    dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-base{ext}"));
+
+                    // If the library has metadata, we can check if we need to load additional dependencies
+                    if (library.Metadata != null)
+                    {
+                        if (systemInfo.OSPlatform == OSPlatform.OSX)
                         {
-                            // Construct the dependency (libggml) path
-                            string dependencyPath = Path.Combine(libraryDirectory, $"{libPrefix}ggml{ext}");
-                        
-                            // Try to load the dependency
-                            var dependencyResult = TryLoad(dependencyPath, description.SearchDirectories, config.LogCallback);
+                            // // ggml-metal (uncomment if needed, requires testing)
+                            // if (os == "osx-arm64")
+                            //     dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-metal{ext}"));
+                            
+                            // ggml-cpu
+                            // On OSX, we should load the CPU backend from the current directory
+                            dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cpu{ext}"));
+                        }
+                        else
+                        {
+                            // ggml-cuda
+                            if (library.Metadata.UseCuda)
+                                dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cuda{ext}"));
+                    
+                            // ggml-vulkan
+                            if (library.Metadata.UseVulkan)
+                                dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-vulkan{ext}"));
+                            
+                            // ggml-cpu
+                            // On other platforms (Windows, Linux), we need to load the CPU backend from the specified AVX level directory
+                            // We are using the AVX level supplied by NativeLibraryConfig, which automatically detects the highest supported AVX level for us
+                            dependencyPaths.Add(Path.Combine(
+                                $"runtimes/{os}/native/{NativeLibraryConfig.AvxLevelToString(library.Metadata.AvxLevel)}",
+                                $"{libPrefix}ggml-cpu{ext}"
+                            ));
+                        }
+                    }
+                    
+                    // And finally, we can add ggml
+                    dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml{ext}"));
+                    
+                    // Now, we will loop through our dependencyPaths and try to load them one by one
+                    foreach (var dependencyPath in dependencyPaths)
+                    {
+                        // Try to load the dependency
+                        var dependencyResult = TryLoad(dependencyPath, description.SearchDirectories, config.LogCallback);
                         
-                            // If we successfully loaded the library, log it
-                            if (dependencyResult != IntPtr.Zero)
-                            {
-                                Log($"Successfully loaded dependency '{dependencyPath}'", LLamaLogLevel.Info, config.LogCallback);
-                            }
-                            else
-                            {
-                                Log($"Failed loading dependency '{dependencyPath}'", LLamaLogLevel.Info, config.LogCallback);
-                            }
+                        // If we successfully loaded the library, log it
+                        if (dependencyResult != IntPtr.Zero)
+                        {
+                            Log($"Successfully loaded dependency '{dependencyPath}'", LLamaLogLevel.Info, config.LogCallback);
+                        }
+                        else
+                        {
+                            Log($"Failed loading dependency '{dependencyPath}'", LLamaLogLevel.Info, config.LogCallback);
                         }
                     }
                     
-                    // Try to load the library
+                    // Try to load the main library
                     var result = TryLoad(path, description.SearchDirectories, config.LogCallback);
                     
                     // If we successfully loaded the library, return the handle

From 40a8c6cf94c0cfcac24911f93d04cdc0a3222d95 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Fri, 17 Jan 2025 23:38:24 +0100
Subject: [PATCH 11/22] Restore original comment

---
 LLama/Native/Load/NativeLibraryUtils.cs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs
index 8227e8cc1..0414e1fde 100644
--- a/LLama/Native/Load/NativeLibraryUtils.cs
+++ b/LLama/Native/Load/NativeLibraryUtils.cs
@@ -43,6 +43,8 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib
                 
                 foreach (var path in paths)
                 {
+                    Log($"Got relative library path '{path}' from local with {library.Metadata}, trying to load it...", LLamaLogLevel.Debug, config.LogCallback);
+                    
                     // After the llama.cpp binaries have been split up (PR #10256), we need to load the dependencies manually.
                     // It can't be done automatically on Windows, because the dependencies can be in different folders (for example, ggml-cuda.dll from the cuda12 folder, and ggml-cpu.dll from the avx2 folder)
                     // It can't be done automatically on Linux, because Linux uses the environment variable "LD_LIBRARY_PATH" to automatically load dependencies, and LD_LIBRARY_PATH can only be

From dc3dff15e9112dadd267647fc5251cf67e4cf09a Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 00:43:46 +0100
Subject: [PATCH 12/22] Update the dependency loader for ggml-metal and
 ggml-blas

---
 LLama/Native/Load/NativeLibraryUtils.cs | 29 +++++++++++++++----------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs
index 0414e1fde..d0d853f63 100644
--- a/LLama/Native/Load/NativeLibraryUtils.cs
+++ b/LLama/Native/Load/NativeLibraryUtils.cs
@@ -71,16 +71,29 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib
                     {
                         if (systemInfo.OSPlatform == OSPlatform.OSX)
                         {
-                            // // ggml-metal (uncomment if needed, requires testing)
-                            // if (os == "osx-arm64")
-                            //     dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-metal{ext}"));
+                            // On OSX, we should load the CPU backend from the current directory
                             
                             // ggml-cpu
-                            // On OSX, we should load the CPU backend from the current directory
                             dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cpu{ext}"));
+
+                            // ggml-metal (only supported on osx-arm64)
+                            if (os == "osx-arm64")
+                                dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-metal{ext}"));
+                            
+                            // ggml-blas (osx-x64, osx-x64-rosetta2 and osx-arm64 all have blas)
+                            dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-blas{ext}"));
                         }
                         else
                         {
+                            // On other platforms (Windows, Linux), we need to load the CPU backend from the specified AVX level directory
+                            // We are using the AVX level supplied by NativeLibraryConfig, which automatically detects the highest supported AVX level for us
+                            
+                            // ggml-cpu
+                            dependencyPaths.Add(Path.Combine(
+                                $"runtimes/{os}/native/{NativeLibraryConfig.AvxLevelToString(library.Metadata.AvxLevel)}",
+                                $"{libPrefix}ggml-cpu{ext}"
+                            ));
+                            
                             // ggml-cuda
                             if (library.Metadata.UseCuda)
                                 dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cuda{ext}"));
@@ -88,14 +101,6 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib
                             // ggml-vulkan
                             if (library.Metadata.UseVulkan)
                                 dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-vulkan{ext}"));
-                            
-                            // ggml-cpu
-                            // On other platforms (Windows, Linux), we need to load the CPU backend from the specified AVX level directory
-                            // We are using the AVX level supplied by NativeLibraryConfig, which automatically detects the highest supported AVX level for us
-                            dependencyPaths.Add(Path.Combine(
-                                $"runtimes/{os}/native/{NativeLibraryConfig.AvxLevelToString(library.Metadata.AvxLevel)}",
-                                $"{libPrefix}ggml-cpu{ext}"
-                            ));
                         }
                     }
                     

From 7b558ceb7d1182c7dcc834b1b2b1b262f49e9f38 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 00:47:54 +0100
Subject: [PATCH 13/22] Update the runtime targets for ggml-metal and ggml-blas

---
 LLama/LLamaSharp.Runtime.targets | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 10140cc06..2523e55c1 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -263,6 +263,14 @@
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/osx-arm64/native/libggml-cpu.dylib</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-metal.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-arm64/native/libggml-metal.dylib</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml-blas.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-arm64/native/libggml-blas.dylib</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libggml.dylib">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-arm64/native/libggml.dylib</Link>
@@ -288,6 +296,10 @@
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/osx-x64/native/libggml-cpu.dylib</Link>
       </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libggml-blas.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-x64/native/libggml-blas.dylib</Link>
+      </None>
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libggml.dylib">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-x64/native/libggml.dylib</Link>
@@ -309,6 +321,10 @@
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/osx-x64/native/rosetta2/libggml-cpu.dylib</Link>
       </None>        
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64-rosetta2/libggml-blas.dylib">
+          <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+          <Link>runtimes/osx-x64/native/rosetta2/libggml-blas.dylib</Link>
+      </None>        
       <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64-rosetta2/libggml.dylib">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/osx-x64/native/rosetta2/libggml.dylib</Link>

From 6d0b42122e655cc9d393c10e971adb8017c1035d Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 00:54:18 +0100
Subject: [PATCH 14/22] Add CPU backend (fallback) dependency for the GPU
 backends

---
 LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec   | 4 ++++
 LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec | 4 ++++
 LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec   | 4 ++++
 LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec | 4 ++++
 LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec   | 4 ++++
 LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec | 4 ++++
 6 files changed, 24 insertions(+)

diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
index 7b4f959f4..db7e1c139 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
@@ -12,6 +12,10 @@
         <releaseNotes></releaseNotes>
         <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
         <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
+        
+        <dependencies>
+            <dependency id="LLamaSharp.Backend.Cpu" version="$version$" />
+        </dependencies>
     </metadata>
 
     <files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
index 34bc6781d..72d7c5774 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
@@ -12,6 +12,10 @@
         <releaseNotes></releaseNotes>
         <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
         <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
+
+        <dependencies>
+            <dependency id="LLamaSharp.Backend.Cpu" version="$version$" />
+        </dependencies>
     </metadata>
 
     <files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
index 8834ae413..643ac1633 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
@@ -12,6 +12,10 @@
         <releaseNotes></releaseNotes>
         <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
         <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
+
+        <dependencies>
+            <dependency id="LLamaSharp.Backend.Cpu" version="$version$" />
+        </dependencies>
     </metadata>
 
     <files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
index 3d37accec..8117cf04f 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
@@ -12,6 +12,10 @@
         <releaseNotes></releaseNotes>
         <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
         <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
+
+        <dependencies>
+            <dependency id="LLamaSharp.Backend.Cpu" version="$version$" />
+        </dependencies>
     </metadata>
 
     <files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
index 725764097..77ae83324 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
@@ -12,6 +12,10 @@
         <releaseNotes></releaseNotes>
         <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
         <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
+
+        <dependencies>
+            <dependency id="LLamaSharp.Backend.Cpu" version="$version$" />
+        </dependencies>
     </metadata>
 
     <files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
index 5c5b83f94..55ee0784c 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
@@ -12,6 +12,10 @@
         <releaseNotes></releaseNotes>
         <copyright>Copyright 2023 The llama.cpp Authors. All rights reserved.</copyright>
         <tags>LLamaSharp LLama LLM GPT AI ChatBot SciSharp</tags>
+
+        <dependencies>
+            <dependency id="LLamaSharp.Backend.Cpu" version="$version$" />
+        </dependencies>
     </metadata>
 
     <files>

From 4dbdc822519790cec562fcdb90d9a8edc8cc4835 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 00:55:22 +0100
Subject: [PATCH 15/22] Fix icons for the nuget backends

---
 LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec   | 1 +
 LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec | 1 +
 LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec   | 1 +
 LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec | 1 +
 LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec   | 1 +
 LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec | 1 +
 6 files changed, 6 insertions(+)

diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
index db7e1c139..c158e9459 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
@@ -7,6 +7,7 @@
         <authors>llama.cpp Authors</authors>
         <requireLicenseAcceptance>false</requireLicenseAcceptance>
         <license type="expression">MIT</license>
+        <icon>icon512.png</icon>
         <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
         <description>LLamaSharp.Backend.Cuda11.Linux contains the Linux binaries for LLamaSharp with Cuda11 support.</description>
         <releaseNotes></releaseNotes>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
index 72d7c5774..0eaac8d04 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
@@ -7,6 +7,7 @@
         <authors>llama.cpp Authors</authors>
         <requireLicenseAcceptance>false</requireLicenseAcceptance>
         <license type="expression">MIT</license>
+        <icon>icon512.png</icon>
         <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
         <description>LLamaSharp.Backend.Cuda11.Windows contains the Windows binaries for LLamaSharp with Cuda11 support.</description>
         <releaseNotes></releaseNotes>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
index 643ac1633..87c58d8a4 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
@@ -7,6 +7,7 @@
         <authors>llama.cpp Authors</authors>
         <requireLicenseAcceptance>false</requireLicenseAcceptance>
         <license type="expression">MIT</license>
+        <icon>icon512.png</icon>
         <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
         <description>LLamaSharp.Backend.Cuda12.Linux contains the Linux binaries for LLamaSharp with Cuda12 support.</description>
         <releaseNotes></releaseNotes>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
index 8117cf04f..4ad3ec096 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
@@ -7,6 +7,7 @@
         <authors>llama.cpp Authors</authors>
         <requireLicenseAcceptance>false</requireLicenseAcceptance>
         <license type="expression">MIT</license>
+        <icon>icon512.png</icon>
         <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
         <description>LLamaSharp.Backend.Cuda12.Windows contains the Windows binaries for LLamaSharp with Cuda12 support.</description>
         <releaseNotes></releaseNotes>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
index 77ae83324..ce3d74f5b 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
@@ -7,6 +7,7 @@
         <authors>llama.cpp Authors</authors>
         <requireLicenseAcceptance>false</requireLicenseAcceptance>
         <license type="expression">MIT</license>
+        <icon>icon512.png</icon>
         <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
         <description>LLamaSharp.Backend.Vulkan.Linux contains the Linux binaries for LLamaSharp with Vulkan support.</description>
         <releaseNotes></releaseNotes>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
index 55ee0784c..f353ce105 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
@@ -7,6 +7,7 @@
         <authors>llama.cpp Authors</authors>
         <requireLicenseAcceptance>false</requireLicenseAcceptance>
         <license type="expression">MIT</license>
+        <icon>icon512.png</icon>
         <projectUrl>https://github.com/SciSharp/LLamaSharp</projectUrl>
         <description>LLamaSharp.Backend.Vulkan.Windows contains the Windows binaries for LLamaSharp with Vulkan support.</description>
         <releaseNotes></releaseNotes>

From 556a7c153ff9e884b584f663cd95cf4ae5a193c0 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 01:03:59 +0100
Subject: [PATCH 16/22] Update nuspec files for the GPU backends

---
 .../runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec  | 6 +++++-
 .../build/LLamaSharp.Backend.Cuda11.Windows.nuspec         | 6 +++++-
 LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec      | 1 +
 .../runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec  | 7 +++++--
 .../build/LLamaSharp.Backend.Cuda12.Windows.nuspec         | 7 +++++--
 .../runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec  | 6 +++++-
 .../build/LLamaSharp.Backend.Vulkan.Windows.nuspec         | 7 ++++++-
 LLama/runtimes/build/LLamaSharp.Backend.Vulkan.nuspec      | 3 ++-
 8 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
index c158e9459..6abd16ccc 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Linux.nuspec
@@ -21,9 +21,13 @@
 
     <files>
         <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda11.props" />
-        <file src="runtimes/deps/cu11.7.1/libllava_shared.so" target="runtimes/linux-x64/native/cuda11/libllava_shared.so" />
+
         <file src="runtimes/deps/cu11.7.1/libggml.so" target="runtimes/linux-x64/native/cuda11/libggml.so" />
+        <file src="runtimes/deps/cu11.7.1/libggml-base.so" target="runtimes/linux-x64/native/cuda11/libggml-base.so" />
+        <file src="runtimes/deps/cu11.7.1/libggml-cuda.so" target="runtimes/linux-x64/native/cuda11/libggml-cuda.so" />
+
         <file src="runtimes/deps/cu11.7.1/libllama.so" target="runtimes/linux-x64/native/cuda11/libllama.so" />
+        <file src="runtimes/deps/cu11.7.1/libllava_shared.so" target="runtimes/linux-x64/native/cuda11/libllava_shared.so" />
         
         <file src="icon512.png" target="icon512.png" />
     </files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
index 0eaac8d04..a412e2e6f 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.Windows.nuspec
@@ -21,9 +21,13 @@
 
     <files>
         <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda11.props" />
-        <file src="runtimes/deps/cu11.7.1/llava_shared.dll" target="runtimes\win-x64\native\cuda11\llava_shared.dll" />
+
         <file src="runtimes/deps/cu11.7.1/ggml.dll" target="runtimes\win-x64\native\cuda11\ggml.dll" />
+        <file src="runtimes/deps/cu11.7.1/ggml-base.dll" target="runtimes\win-x64\native\cuda11\ggml-base.dll" />
+        <file src="runtimes/deps/cu11.7.1/ggml-cuda.dll" target="runtimes\win-x64\native\cuda11\ggml-cuda.dll" />
+
         <file src="runtimes/deps/cu11.7.1/llama.dll" target="runtimes\win-x64\native\cuda11\llama.dll" />
+        <file src="runtimes/deps/cu11.7.1/llava_shared.dll" target="runtimes\win-x64\native\cuda11\llava_shared.dll" />
         
         <file src="icon512.png" target="icon512.png" />
     </files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec
index 1beeeaafc..5ac473914 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec
@@ -22,6 +22,7 @@
     </metadata>
 
     <files>
+        <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda11.props" />
         <file src="icon512.png" target="icon512.png" />
     </files>
 </package>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
index 87c58d8a4..687283221 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Linux.nuspec
@@ -21,10 +21,13 @@
 
     <files>
         <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda12.props" />
-        
-        <file src="runtimes/deps/cu12.2.0/libllava_shared.so" target="runtimes/linux-x64/native/cuda12/libllava_shared.so" />
+
         <file src="runtimes/deps/cu12.2.0/libggml.so" target="runtimes/linux-x64/native/cuda12/libggml.so" />
+        <file src="runtimes/deps/cu12.2.0/libggml-base.so" target="runtimes/linux-x64/native/cuda12/libggml-base.so" />
+        <file src="runtimes/deps/cu12.2.0/libggml-cuda.so" target="runtimes/linux-x64/native/cuda12/libggml-cuda.so" />
+
         <file src="runtimes/deps/cu12.2.0/libllama.so" target="runtimes/linux-x64/native/cuda12/libllama.so" />
+        <file src="runtimes/deps/cu12.2.0/libllava_shared.so" target="runtimes/linux-x64/native/cuda12/libllava_shared.so" />
         
         <file src="icon512.png" target="icon512.png" />
     </files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
index 4ad3ec096..1fd01edb9 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.Windows.nuspec
@@ -21,10 +21,13 @@
 
     <files>
         <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda12.props" />
-        
-        <file src="runtimes/deps/cu12.2.0/llava_shared.dll" target="runtimes\win-x64\native\cuda12\llava_shared.dll" />
+
         <file src="runtimes/deps/cu12.2.0/ggml.dll" target="runtimes\win-x64\native\cuda12\ggml.dll" />
+        <file src="runtimes/deps/cu12.2.0/ggml-base.dll" target="runtimes\win-x64\native\cuda12\ggml-base.dll" />
+        <file src="runtimes/deps/cu12.2.0/ggml-cuda.dll" target="runtimes\win-x64\native\cuda12\ggml-cuda.dll" />
+        
         <file src="runtimes/deps/cu12.2.0/llama.dll" target="runtimes\win-x64\native\cuda12\llama.dll" />
+        <file src="runtimes/deps/cu12.2.0/llava_shared.dll" target="runtimes\win-x64\native\cuda12\llava_shared.dll" />
         
         <file src="icon512.png" target="icon512.png" />
     </files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
index ce3d74f5b..3f2202db4 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Linux.nuspec
@@ -21,9 +21,13 @@
 
     <files>
         <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Vulkan.props" />
-        <file src="runtimes/deps/vulkan/libllava_shared.so" target="runtimes/linux-x64/native/vulkan/libllava_shared.so" />
+
         <file src="runtimes/deps/vulkan/libggml.so" target="runtimes/linux-x64/native/vulkan/libggml.so" />
+        <file src="runtimes/deps/vulkan/libggml-base.so" target="runtimes/linux-x64/native/vulkan/libggml-base.so" />
+        <file src="runtimes/deps/vulkan/libggml-vulkan.so" target="runtimes/linux-x64/native/vulkan/libggml-vulkan.so" />
+
         <file src="runtimes/deps/vulkan/libllama.so" target="runtimes/linux-x64/native/vulkan/libllama.so" />
+        <file src="runtimes/deps/vulkan/libllava_shared.so" target="runtimes/linux-x64/native/vulkan/libllava_shared.so" />
         
         <file src="icon512.png" target="icon512.png" />
     </files>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
index f353ce105..3f7487bcd 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.Windows.nuspec
@@ -21,9 +21,14 @@
 
     <files>
         <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Vulkan.props" />
-        <file src="runtimes/deps/vulkan/llava_shared.dll" target="runtimes\win-x64\native\vulkan\llava_shared.dll" />
+        
         <file src="runtimes/deps/vulkan/ggml.dll" target="runtimes\win-x64\native\vulkan\ggml.dll" />
+        <file src="runtimes/deps/vulkan/ggml-base.dll" target="runtimes\win-x64\native\vulkan\ggml-base.dll" />
+        <file src="runtimes/deps/vulkan/ggml-vulkan.dll" target="runtimes\win-x64\native\vulkan\ggml-vulkan.dll" />
+
         <file src="runtimes/deps/vulkan/llama.dll" target="runtimes\win-x64\native\vulkan\llama.dll" />
+        <file src="runtimes/deps/vulkan/llava_shared.dll" target="runtimes\win-x64\native\vulkan\llava_shared.dll" />
+        
         <file src="icon512.png" target="icon512.png" />
     </files>
 </package>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.nuspec
index b4f26ec97..c972ad0fc 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Vulkan.nuspec
@@ -22,6 +22,7 @@
     </metadata>
 
     <files>
+        <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Vulkan.props" />
         <file src="icon512.png" target="icon512.png" />
     </files>
-</package>
+</package>
\ No newline at end of file

From f526cbed04c9ddfb171a7c5e2d354246351f3897 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 01:04:32 +0100
Subject: [PATCH 17/22] Update BinaryReleaseId

---
 LLama/LLamaSharp.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 4d0d19eb3..784f77221 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>0827b2c1da-v2</BinaryReleaseId>
+    <BinaryReleaseId>0827b2c1da-v5</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>

From 91effe9d24f3aaa48e9bdf0c6920d97808ff0ef4 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 01:13:21 +0100
Subject: [PATCH 18/22] Update nuspec for CPU & OSX

---
 .../build/LLamaSharp.Backend.Cpu.nuspec       | 46 ++++++++++++++-----
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index ab8d11c04..382eb2ae8 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -18,53 +18,77 @@
   <files>
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cpu.props" />
 
+    <file src="runtimes/deps/ggml.dll" target="runtimes\win-x64\native\ggml.dll" />
     <file src="runtimes/deps/ggml-base.dll" target="runtimes\win-x64\native\ggml-base.dll" />
     <file src="runtimes/deps/ggml-cpu.dll" target="runtimes\win-x64\native\ggml-cpu.dll" />
     <file src="runtimes/deps/llama.dll" target="runtimes\win-x64\native\llama.dll" />
+    <file src="runtimes/deps/llava_shared.dll" target="runtimes\win-x64\native\llava_shared.dll" />
       
+    <file src="runtimes/deps/avx/ggml.dll" target="runtimes\win-x64\native\avx\ggml.dll" />
     <file src="runtimes/deps/avx/ggml-base.dll" target="runtimes\win-x64\native\avx\ggml-base.dll" />
     <file src="runtimes/deps/avx/ggml-cpu.dll" target="runtimes\win-x64\native\avx\ggml-cpu.dll" />
     <file src="runtimes/deps/avx/llama.dll" target="runtimes\win-x64\native\avx\llama.dll" />
+    <file src="runtimes/deps/avx/llava_shared.dll" target="runtimes\win-x64\native\avx\llava_shared.dll" />
       
+    <file src="runtimes/deps/avx2/ggml.dll" target="runtimes\win-x64\native\avx2\ggml.dll" />
     <file src="runtimes/deps/avx2/ggml-base.dll" target="runtimes\win-x64\native\avx2\ggml-base.dll" />
     <file src="runtimes/deps/avx2/ggml-cpu.dll" target="runtimes\win-x64\native\avx2\ggml-cpu.dll" />
     <file src="runtimes/deps/avx2/llama.dll" target="runtimes\win-x64\native\avx2\llama.dll" />
+    <file src="runtimes/deps/avx2/llava_shared.dll" target="runtimes\win-x64\native\avx2\llava_shared.dll" />
       
+    <file src="runtimes/deps/avx512/ggml.dll" target="runtimes\win-x64\native\avx512\ggml.dll" />
     <file src="runtimes/deps/avx512/ggml-base.dll" target="runtimes\win-x64\native\avx512\ggml-base.dll" />
     <file src="runtimes/deps/avx512/ggml-cpu.dll" target="runtimes\win-x64\native\avx512\ggml-cpu.dll" />
     <file src="runtimes/deps/avx512/llama.dll" target="runtimes\win-x64\native\avx512\llama.dll" />
+    <file src="runtimes/deps/avx512/llava_shared.dll" target="runtimes\win-x64\native\avx512\llava_shared.dll" />
 
     <file src="runtimes/deps/libggml.so" target="runtimes\linux-x64\native\libggml.so" />
+    <file src="runtimes/deps/libggml-base.so" target="runtimes\linux-x64\native\libggml-base.so" />
+    <file src="runtimes/deps/libggml-cpu.so" target="runtimes\linux-x64\native\libggml-cpu.so" />
     <file src="runtimes/deps/libllama.so" target="runtimes\linux-x64\native\libllama.so" />
+    <file src="runtimes/deps/libllava_shared.so" target="runtimes\linux-x64\native\libllava_shared.so" />
+      
     <file src="runtimes/deps/avx/libggml.so" target="runtimes\linux-x64\native\avx\libggml.so" />
+    <file src="runtimes/deps/avx/libggml-base.so" target="runtimes\linux-x64\native\avx\libggml-base.so" />
+    <file src="runtimes/deps/avx/libggml-cpu.so" target="runtimes\linux-x64\native\avx\libggml-cpu.so" />
     <file src="runtimes/deps/avx/libllama.so" target="runtimes\linux-x64\native\avx\libllama.so" />
+    <file src="runtimes/deps/avx/libllava_shared.so" target="runtimes\linux-x64\native\avx\libllava_shared.so" />
+      
     <file src="runtimes/deps/avx2/libggml.so" target="runtimes\linux-x64\native\avx2\libggml.so" />
+    <file src="runtimes/deps/avx2/libggml-base.so" target="runtimes\linux-x64\native\avx2\libggml-base.so" />
+    <file src="runtimes/deps/avx2/libggml-cpu.so" target="runtimes\linux-x64\native\avx2\libggml-cpu.so" />
     <file src="runtimes/deps/avx2/libllama.so" target="runtimes\linux-x64\native\avx2\libllama.so" />
+    <file src="runtimes/deps/avx2/libllava_shared.so" target="runtimes\linux-x64\native\avx2\libllava_shared.so" />
+      
     <file src="runtimes/deps/avx512/libggml.so" target="runtimes\linux-x64\native\avx512\libggml.so" />
+    <file src="runtimes/deps/avx512/libggml-base.so" target="runtimes\linux-x64\native\avx512\libggml-base.so" />
+    <file src="runtimes/deps/avx512/libggml-cpu.so" target="runtimes\linux-x64\native\avx512\libggml-cpu.so" />
     <file src="runtimes/deps/avx512/libllama.so" target="runtimes\linux-x64\native\avx512\libllama.so" />
+    <file src="runtimes/deps/avx512/libllava_shared.so" target="runtimes\linux-x64\native\avx512\libllava_shared.so" />
       
     <file src="runtimes/deps/osx-x64/libggml.dylib" target="runtimes\osx-x64\native\libggml.dylib" />
+    <file src="runtimes/deps/osx-x64/libggml-base.dylib" target="runtimes\osx-x64\native\libggml-base.dylib" />
+    <file src="runtimes/deps/osx-x64/libggml-cpu.dylib" target="runtimes\osx-x64\native\libggml-cpu.dylib" />
+    <file src="runtimes/deps/osx-x64/libggml-blas.dylib" target="runtimes\osx-x64\native\libggml-blas.dylib" />
     <file src="runtimes/deps/osx-x64/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
     <file src="runtimes/deps/osx-x64/libllava_shared.dylib" target="runtimes\osx-x64\native\libllava_shared.dylib" />
 
     <file src="runtimes/deps/osx-x64-rosetta2/libggml.dylib" target="runtimes\osx-x64\native\rosetta2\libggml.dylib" />
+    <file src="runtimes/deps/osx-x64-rosetta2/libggml-base.dylib" target="runtimes\osx-x64\native\rosetta2\libggml-base.dylib" />
+    <file src="runtimes/deps/osx-x64-rosetta2/libggml-cpu.dylib" target="runtimes\osx-x64\native\rosetta2\libggml-cpu.dylib" />
+    <file src="runtimes/deps/osx-x64-rosetta2/libggml-blas.dylib" target="runtimes\osx-x64\native\rosetta2\libggml-blas.dylib" />
     <file src="runtimes/deps/osx-x64-rosetta2/libllama.dylib" target="runtimes\osx-x64\native\rosetta2\libllama.dylib" />
     <file src="runtimes/deps/osx-x64-rosetta2/libllava_shared.dylib" target="runtimes\osx-x64\native\rosetta2\libllava_shared.dylib" />
 
     <file src="runtimes/deps/osx-arm64/libggml.dylib" target="runtimes\osx-arm64\native\libggml.dylib" />
-    <file src="runtimes/deps/osx-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
+    <file src="runtimes/deps/osx-arm64/libggml-base.dylib" target="runtimes\osx-arm64\native\libggml-base.dylib" />
+    <file src="runtimes/deps/osx-arm64/libggml-cpu.dylib" target="runtimes\osx-arm64\native\libggml-cpu.dylib" />
+    <file src="runtimes/deps/osx-arm64/libggml-blas.dylib" target="runtimes\osx-arm64\native\libggml-blas.dylib" />
+    <file src="runtimes/deps/osx-arm64/libggml-metal.dylib" target="runtimes\osx-arm64\native\libggml-metal.dylib" />
     <file src="runtimes/deps/osx-arm64/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
+    <file src="runtimes/deps/osx-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
     <file src="runtimes/deps/osx-arm64/libllava_shared.dylib" target="runtimes\osx-arm64\native\libllava_shared.dylib" />
-
-    <file src="runtimes/deps/llava_shared.dll" target="runtimes\win-x64\native\llava_shared.dll" />
-    <file src="runtimes/deps/avx/llava_shared.dll" target="runtimes\win-x64\native\avx\llava_shared.dll" />
-    <file src="runtimes/deps/avx2/llava_shared.dll" target="runtimes\win-x64\native\avx2\llava_shared.dll" />
-    <file src="runtimes/deps/avx512/llava_shared.dll" target="runtimes\win-x64\native\avx512\llava_shared.dll" />
-
-    <file src="runtimes/deps/libllava_shared.so" target="runtimes\linux-x64\native\libllava_shared.so" />
-    <file src="runtimes/deps/avx/libllava_shared.so" target="runtimes\linux-x64\native\avx\libllava_shared.so" />
-    <file src="runtimes/deps/avx2/libllava_shared.so" target="runtimes\linux-x64\native\avx2\libllava_shared.so" />
-    <file src="runtimes/deps/avx512/libllava_shared.so" target="runtimes\linux-x64\native\avx512\libllava_shared.so" />
+      
     <file src="icon512.png" target="icon512.png" />
   </files>
 </package>

From 3be20b1ecf5d3628cab9b36ced0f5d66e728862e Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 21:44:06 +0100
Subject: [PATCH 19/22] Update CPU nuspec to use noavx folder

---
 .../build/LLamaSharp.Backend.Cpu.nuspec       | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index 382eb2ae8..debc99506 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -18,11 +18,11 @@
   <files>
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cpu.props" />
 
-    <file src="runtimes/deps/ggml.dll" target="runtimes\win-x64\native\ggml.dll" />
-    <file src="runtimes/deps/ggml-base.dll" target="runtimes\win-x64\native\ggml-base.dll" />
-    <file src="runtimes/deps/ggml-cpu.dll" target="runtimes\win-x64\native\ggml-cpu.dll" />
-    <file src="runtimes/deps/llama.dll" target="runtimes\win-x64\native\llama.dll" />
-    <file src="runtimes/deps/llava_shared.dll" target="runtimes\win-x64\native\llava_shared.dll" />
+    <file src="runtimes/deps/noavx/ggml.dll" target="runtimes\win-x64\native\noavx\ggml.dll" />
+    <file src="runtimes/deps/noavx/ggml-base.dll" target="runtimes\win-x64\native\noavx\ggml-base.dll" />
+    <file src="runtimes/deps/noavx/ggml-cpu.dll" target="runtimes\win-x64\native\noavx\ggml-cpu.dll" />
+    <file src="runtimes/deps/noavx/llama.dll" target="runtimes\win-x64\native\noavx\llama.dll" />
+    <file src="runtimes/deps/noavx/llava_shared.dll" target="runtimes\win-x64\native\noavx\llava_shared.dll" />
       
     <file src="runtimes/deps/avx/ggml.dll" target="runtimes\win-x64\native\avx\ggml.dll" />
     <file src="runtimes/deps/avx/ggml-base.dll" target="runtimes\win-x64\native\avx\ggml-base.dll" />
@@ -42,11 +42,11 @@
     <file src="runtimes/deps/avx512/llama.dll" target="runtimes\win-x64\native\avx512\llama.dll" />
     <file src="runtimes/deps/avx512/llava_shared.dll" target="runtimes\win-x64\native\avx512\llava_shared.dll" />
 
-    <file src="runtimes/deps/libggml.so" target="runtimes\linux-x64\native\libggml.so" />
-    <file src="runtimes/deps/libggml-base.so" target="runtimes\linux-x64\native\libggml-base.so" />
-    <file src="runtimes/deps/libggml-cpu.so" target="runtimes\linux-x64\native\libggml-cpu.so" />
-    <file src="runtimes/deps/libllama.so" target="runtimes\linux-x64\native\libllama.so" />
-    <file src="runtimes/deps/libllava_shared.so" target="runtimes\linux-x64\native\libllava_shared.so" />
+    <file src="runtimes/deps/noavx/libggml.so" target="runtimes\linux-x64\native\noavx\libggml.so" />
+    <file src="runtimes/deps/noavx/libggml-base.so" target="runtimes\linux-x64\native\noavx\libggml-base.so" />
+    <file src="runtimes/deps/noavx/libggml-cpu.so" target="runtimes\linux-x64\native\noavx\libggml-cpu.so" />
+    <file src="runtimes/deps/noavx/libllama.so" target="runtimes\linux-x64\native\noavx\libllama.so" />
+    <file src="runtimes/deps/noavx/libllava_shared.so" target="runtimes\linux-x64\native\noavx\libllava_shared.so" />
       
     <file src="runtimes/deps/avx/libggml.so" target="runtimes\linux-x64\native\avx\libggml.so" />
     <file src="runtimes/deps/avx/libggml-base.so" target="runtimes\linux-x64\native\avx\libggml-base.so" />

From 686627c691f2b2cc178b8fd8e487948b869b1e77 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Sun, 19 Jan 2025 21:45:32 +0100
Subject: [PATCH 20/22] Update Runtime.targets to use noavx folder

---
 LLama/LLamaSharp.Runtime.targets | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 2523e55c1..76292aaf5 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -4,19 +4,19 @@
     </PropertyGroup>
     <ItemGroup Condition="'$(IncludeBuiltInRuntimes)' == 'true'">
 
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/llama.dll">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/llama.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/noavx/llama.dll</Link>
       </None>
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/ggml.dll">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/ggml.dll">
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/win-x64/native/noavx/ggml.dll</Link>
       </None>
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/ggml-base.dll">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/ggml-base.dll">
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/win-x64/native/noavx/ggml-base.dll</Link>
       </None>
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/ggml-cpu.dll">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/ggml-cpu.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/noavx/ggml-cpu.dll</Link>
       </None>
@@ -130,19 +130,19 @@
       </None>
 
 
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllama.so">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/libllama.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/noavx/libllama.so</Link>
       </None>
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libggml.so">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/libggml.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/noavx/libggml.so</Link>
       </None>
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libggml-base.so">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/libggml-base.so">
           <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
           <Link>runtimes/linux-x64/native/noavx/libggml-base.so</Link>
       </None>
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libggml-cpu.so">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/libggml-cpu.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/noavx/libggml-cpu.so</Link>
       </None>
@@ -338,7 +338,7 @@
         <Link>runtimes/osx-x64/native/rosetta2/libllava_shared.dylib</Link>
       </None>
 
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/llava_shared.dll">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/llava_shared.dll">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/win-x64/native/noavx/llava_shared.dll</Link>
       </None>
@@ -367,7 +367,7 @@
         <Link>runtimes/win-x64/native/vulkan/llava_shared.dll</Link>
       </None>
 
-      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllava_shared.so">
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/noavx/libllava_shared.so">
         <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
         <Link>runtimes/linux-x64/native/noavx/libllava_shared.so</Link>
       </None>

From 1913966d2a967dbe8385f5cfef06311c74ffd43e Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Mon, 20 Jan 2025 00:44:51 +0100
Subject: [PATCH 21/22] Update BinaryReleaseId

---
 LLama/LLamaSharp.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LLama/LLamaSharp.csproj b/LLama/LLamaSharp.csproj
index 784f77221..b2d81711e 100644
--- a/LLama/LLamaSharp.csproj
+++ b/LLama/LLamaSharp.csproj
@@ -56,7 +56,7 @@
   </ItemGroup>
 
   <PropertyGroup>
-    <BinaryReleaseId>0827b2c1da-v5</BinaryReleaseId>
+    <BinaryReleaseId>0827b2c1da-v6</BinaryReleaseId>
   </PropertyGroup>
 
   <PropertyGroup>

From 014ef7844b1b455f6f877a788828e2a2ec9bbeb8 Mon Sep 17 00:00:00 2001
From: m0nsky <ramonkroes@live.co.uk>
Date: Mon, 20 Jan 2025 01:48:57 +0100
Subject: [PATCH 22/22] CUDA & Vulkan native libraries now correctly store the
 detected or user defined AVX level

---
 LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs | 4 ++--
 LLama/Native/Load/NativeLibraryUtils.cs                  | 2 +-
 LLama/Native/Load/NativeLibraryWithCuda.cs               | 4 +++-
 LLama/Native/Load/NativeLibraryWithVulkan.cs             | 4 +++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs
index 497902ba1..36ab0c0c8 100644
--- a/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs
+++ b/LLama/Native/Load/DefaultNativeLibrarySelectingPolicy.cs
@@ -22,12 +22,12 @@ public IEnumerable<INativeLibrary> Apply(NativeLibraryConfig.Description descrip
             {
                 if (description.UseCuda)
                 {
-                    yield return new NativeLibraryWithCuda(systemInfo.CudaMajorVersion, description.Library, description.SkipCheck);
+                    yield return new NativeLibraryWithCuda(systemInfo.CudaMajorVersion, description.Library, description.AvxLevel, description.SkipCheck);
                 }
 
                 if (description.UseVulkan)
                 {
-                    yield return new NativeLibraryWithVulkan(systemInfo.VulkanVersion, description.Library, description.SkipCheck);
+                    yield return new NativeLibraryWithVulkan(systemInfo.VulkanVersion, description.Library, description.AvxLevel, description.SkipCheck);
                 }
 
                 if((!description.UseCuda || !description.UseVulkan) || description.AllowFallback)
diff --git a/LLama/Native/Load/NativeLibraryUtils.cs b/LLama/Native/Load/NativeLibraryUtils.cs
index d0d853f63..13e68be4d 100644
--- a/LLama/Native/Load/NativeLibraryUtils.cs
+++ b/LLama/Native/Load/NativeLibraryUtils.cs
@@ -93,7 +93,7 @@ internal static IntPtr TryLoadLibrary(NativeLibraryConfig config, out INativeLib
                                 $"runtimes/{os}/native/{NativeLibraryConfig.AvxLevelToString(library.Metadata.AvxLevel)}",
                                 $"{libPrefix}ggml-cpu{ext}"
                             ));
-                            
+
                             // ggml-cuda
                             if (library.Metadata.UseCuda)
                                 dependencyPaths.Add(Path.Combine(currentRuntimeDirectory, $"{libPrefix}ggml-cuda{ext}"));
diff --git a/LLama/Native/Load/NativeLibraryWithCuda.cs b/LLama/Native/Load/NativeLibraryWithCuda.cs
index 12da095dc..36dc4ca81 100644
--- a/LLama/Native/Load/NativeLibraryWithCuda.cs
+++ b/LLama/Native/Load/NativeLibraryWithCuda.cs
@@ -28,11 +28,13 @@ public NativeLibraryMetadata? Metadata
         /// </summary>
         /// <param name="majorCudaVersion"></param>
         /// <param name="libraryName"></param>
+        /// <param name="avxLevel"></param>
         /// <param name="skipCheck"></param>
-        public NativeLibraryWithCuda(int majorCudaVersion, NativeLibraryName libraryName, bool skipCheck)
+        public NativeLibraryWithCuda(int majorCudaVersion, NativeLibraryName libraryName, AvxLevel avxLevel, bool skipCheck)
         {
             _majorCudaVersion = majorCudaVersion;
             _libraryName = libraryName;
+            _avxLevel = avxLevel;
             _skipCheck = skipCheck;
         }
 
diff --git a/LLama/Native/Load/NativeLibraryWithVulkan.cs b/LLama/Native/Load/NativeLibraryWithVulkan.cs
index fe4eef01e..c3fe94de3 100644
--- a/LLama/Native/Load/NativeLibraryWithVulkan.cs
+++ b/LLama/Native/Load/NativeLibraryWithVulkan.cs
@@ -28,11 +28,13 @@ public NativeLibraryMetadata? Metadata
         /// </summary>
         /// <param name="vulkanVersion"></param>
         /// <param name="libraryName"></param>
+        /// <param name="avxLevel"></param>
         /// <param name="skipCheck"></param>
-        public NativeLibraryWithVulkan(string? vulkanVersion, NativeLibraryName libraryName, bool skipCheck)
+        public NativeLibraryWithVulkan(string? vulkanVersion, NativeLibraryName libraryName, AvxLevel avxLevel, bool skipCheck)
         {
             _vulkanVersion = vulkanVersion;
             _libraryName = libraryName;
+            _avxLevel = avxLevel;
             _skipCheck = skipCheck;
         }