Switch the tensor override helper to a local variable

dpmm99 · dpmm99 · commit f4213a295c94 · 2025-05-11T15:34:50.000-05:00
Tested with this configuration in BatchedExecutorSimple:
parameters.GpuLayerCount = 99;
parameters.TensorBufferOverrides = new List&lt;Abstractions.TensorBufferOverride&gt; { new("blk\.(2[6-9]|[3-4][0-9]).*", "CPU") };
Because I used that to speed up Qwen-3-30B-A3B by a factor of 10 on my machine (though it would likely be less for batching since it's an MoE).
diff --git a/LLama/Extensions/IModelParamsExtensions.cs b/LLama/Extensions/IModelParamsExtensions.cs
@@ -11,8 +11,6 @@ namespace LLama.Extensions;
 /// </summary>
 public static class IModelParamsExtensions
 {
-    private static LLamaTensorBufferOverrideHelper bufferOverrideHelper = new();
-
     /// <summary>
     /// Convert the given `IModelParams` into a `LLamaModelParams`
     /// </summary>
@@ -50,6 +48,7 @@ public static IDisposable ToLlamaModelParams(this IModelParams @params, out LLam
         // Add tensor buffer overrides, if any
         if (@params.TensorBufferOverrides.Count > 0)
         {
+            var bufferOverrideHelper = new LLamaTensorBufferOverrideHelper();
             disposer.Add(bufferOverrideHelper);
 
             foreach (var tensorOverride in @params.TensorBufferOverrides)