Skip to content

Commit da4e62d

Browse files
committed
Merge branch 'UpdateContextHandling' of https://github.com/zsogitbe/LLamaSharp into UpdateContextHandling
2 parents f7fdaac + f77f80c commit da4e62d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1709
-128
lines changed

.github/_typos.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ extend-exclude = [
1717

1818
[default.extend-words]
1919
# Used in a comment in SafeLLamaSamplerHandle.cs, as a prefix of "hello"
20-
teh = "hel"
20+
teh = "hel"
21+
# ot is the shorthand version of llama.cpp's override-tensor parameter
22+
ot = "ot"

.github/workflows/compile.yml

Lines changed: 62 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,25 @@ jobs:
2828
include:
2929
- build: 'noavx'
3030
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
31+
os: ubuntu-22.04
32+
arch: x64
3133
- build: 'avx2'
3234
defines: ''
35+
os: ubuntu-22.04
36+
arch: x64
3337
- build: 'avx'
3438
defines: '-DGGML_AVX2=OFF'
39+
os: ubuntu-22.04
40+
arch: x64
3541
- build: 'avx512'
3642
defines: '-DGGML_AVX512=ON'
37-
runs-on: ubuntu-24.04
43+
os: ubuntu-22.04
44+
arch: x64
45+
- build: 'aarch64'
46+
defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
47+
os: ubuntu-24.04-arm
48+
arch: arm64
49+
runs-on: ${{ matrix.os }}
3850
steps:
3951
- uses: actions/checkout@v4
4052
with:
@@ -52,28 +64,28 @@ jobs:
5264
- uses: actions/upload-artifact@v4
5365
with:
5466
path: ./build/bin/libllama.so
55-
name: llama-bin-linux-${{ matrix.build }}-x64.so
67+
name: llama-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
5668
if-no-files-found: error
5769
- uses: actions/upload-artifact@v4
5870
with:
5971
path: ./build/bin/libggml.so
60-
name: ggml-bin-linux-${{ matrix.build }}-x64.so
72+
name: ggml-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
6173
if-no-files-found: error
6274
- uses: actions/upload-artifact@v4
6375
with:
6476
path: ./build/bin/libggml-base.so
65-
name: ggml-base-bin-linux-${{ matrix.build }}-x64.so
77+
name: ggml-base-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
6678
if-no-files-found: error
6779
- uses: actions/upload-artifact@v4
6880
with:
6981
path: ./build/bin/libggml-cpu.so
70-
name: ggml-cpu-bin-linux-${{ matrix.build }}-x64.so
82+
name: ggml-cpu-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
7183
if-no-files-found: error
7284
- name: Upload Llava
7385
uses: actions/upload-artifact@v4
7486
with:
7587
path: ./build/bin/libllava_shared.so
76-
name: llava-bin-linux-${{ matrix.build }}-x64.so
88+
name: llava-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
7789
if-no-files-found: error
7890

7991
compile-musl:
@@ -527,19 +539,15 @@ jobs:
527539
if-no-files-found: error
528540

529541
compile-android:
530-
# Disable android build
531-
if: false
532-
542+
name: Compile (Android)
533543
strategy:
534544
fail-fast: true
535545
matrix:
536546
include:
537-
- build: 'x86'
538-
defines: '-DANDROID_ABI=x86'
539547
- build: 'x86_64'
540-
defines: '-DANDROID_ABI=x86_64'
548+
defines: '-DANDROID_ABI=x86_64 -DCMAKE_C_FLAGS=-march=x86-64 -DCMAKE_CXX_FLAGS=-march=x86-64'
541549
- build: 'arm64-v8a'
542-
defines: '-DANDROID_ABI=arm64-v8a'
550+
defines: '-DANDROID_ABI=arm64-v8a -DCMAKE_C_FLAGS=-march=armv8.7a -DCMAKE_C_FLAGS=-march=armv8.7a'
543551
runs-on: ubuntu-24.04
544552
steps:
545553
- uses: actions/checkout@v4
@@ -555,28 +563,39 @@ jobs:
555563
- name: Build
556564
id: cmake_build
557565
env:
558-
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23'
566+
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23 -DGGML_OPENMP=OFF -DGGML_LLAMAFILE=OFF'
559567
run: |
560-
mkdir build
561-
cd build
562-
cmake .. ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }}
563-
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
564-
cd ..
565-
ls -R
568+
# export-lora not supported on 32 bit machines hence breaks x86 build
569+
sed -i '/add_subdirectory(export-lora)/d' examples/CMakeLists.txt # remove export-lora from examples
570+
cmake ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }} -B build
571+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
566572
- name: Upload Llama
567573
uses: actions/upload-artifact@v4
568574
with:
569-
path: ./build/src/libllama.so
575+
path: ./build/bin/libllama.so
570576
name: llama-bin-android-${{ matrix.build }}.so
571-
- uses: actions/upload-artifact@v4
577+
- name: Upload GGML
578+
uses: actions/upload-artifact@v4
572579
with:
573-
path: ./build/ggml/src/libggml.so
580+
path: ./build/bin/libggml.so
574581
name: ggml-bin-android-${{ matrix.build }}.so
575582
if-no-files-found: error
583+
- name: Upload GGML Base
584+
uses: actions/upload-artifact@v4
585+
with:
586+
path: ./build/bin/libggml-base.so
587+
name: ggml-base-bin-android-${{ matrix.build }}.so
588+
if-no-files-found: error
589+
- name: Upload GGML CPU
590+
uses: actions/upload-artifact@v4
591+
with:
592+
path: ./build/bin/libggml-cpu.so
593+
name: ggml-cpu-bin-android-${{ matrix.build }}.so
594+
if-no-files-found: error
576595
- name: Upload Llava
577596
uses: actions/upload-artifact@v4
578597
with:
579-
path: ./build/examples/llava/libllava_shared.so
598+
path: ./build/bin/libllava_shared.so
580599
name: llava-bin-android-${{ matrix.build }}.so
581600

582601
build-deps:
@@ -601,7 +620,7 @@ jobs:
601620
- name: Rearrange Files
602621
run: |
603622
# Make all directories at once
604-
mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
623+
mkdir --parents deps/{noavx,avx,avx2,avx512,linux-arm64,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
605624
606625
# Linux
607626
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/noavx/libggml.so
@@ -628,6 +647,13 @@ jobs:
628647
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
629648
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
630649
650+
# Arm64
651+
cp artifacts/ggml-bin-linux-aarch64-arm64.so/libggml.so deps/linux-arm64/libggml.so
652+
cp artifacts/ggml-base-bin-linux-aarch64-arm64.so/libggml-base.so deps/linux-arm64/libggml-base.so
653+
cp artifacts/ggml-cpu-bin-linux-aarch64-arm64.so/libggml-cpu.so deps/linux-arm64/libggml-cpu.so
654+
cp artifacts/llama-bin-linux-aarch64-arm64.so/libllama.so deps/linux-arm64/libllama.so
655+
cp artifacts/llava-bin-linux-aarch64-arm64.so/libllava_shared.so deps/linux-arm64/libllava_shared.so
656+
631657
# Musl
632658
cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so deps/musl-noavx/libggml.so
633659
cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
@@ -703,17 +729,17 @@ jobs:
703729
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib
704730
705731
# Android
706-
#cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
707-
#cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
708-
#cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
709-
710-
#cp artifacts/ggml-bin-android-x86.so/libggml.so deps/android-x86/libggml.so
711-
#cp artifacts/llama-bin-android-x86.so/libllama.so deps/android-x86/libllama.so
712-
#cp artifacts/llava-bin-android-x86.so/libllava_shared.so deps/android-x86/libllava_shared.so
713-
714-
#cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
715-
#cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
716-
#cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
732+
cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
733+
cp artifacts/ggml-base-bin-android-arm64-v8a.so/libggml-base.so deps/android-arm64-v8a/libggml-base.so
734+
cp artifacts/ggml-cpu-bin-android-arm64-v8a.so/libggml-cpu.so deps/android-arm64-v8a/libggml-cpu.so
735+
cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
736+
cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
737+
738+
cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
739+
cp artifacts/ggml-base-bin-android-x86_64.so/libggml-base.so deps/android-x86_64/libggml-base.so
740+
cp artifacts/ggml-cpu-bin-android-x86_64.so/libggml-cpu.so deps/android-x86_64/libggml-cpu.so
741+
cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
742+
cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
717743
718744
# Windows CUDA
719745
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll

.github/workflows/main.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ jobs:
3838
with:
3939
dotnet-version: |
4040
8.0.x
41+
- name: Install Mobile Workloads
42+
if: ${{ contains(runner.os, 'windows') }}
43+
run: |
44+
dotnet workload install android --ignore-failed-sources
45+
dotnet workload install maui --ignore-failed-sources
46+
- name: Remove Mobile Project
47+
if: ${{ !contains(runner.os, 'windows') }}
48+
run: |
49+
dotnet sln LLamaSharp.sln remove Llama.Mobile
4150
- name: Cache Packages
4251
uses: actions/cache@v4
4352
with:

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,6 @@ test/TensorFlowNET.Examples/mnist
337337
# training model resources
338338
.resources
339339
/redist
340-
*.xml
341340
*.xsd
342341

343342
# docs

LLama.Unittest/ModelsParamsTests.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public void SerializeRoundTripSystemTextJson()
4141
actual.MetadataOverrides = null!;
4242
expected.MetadataOverrides = null!;
4343

44+
// Same deal
45+
Assert.True(expected.TensorBufferOverrides.SequenceEqual(actual.TensorBufferOverrides));
46+
actual.TensorBufferOverrides = null!;
47+
expected.TensorBufferOverrides = null!;
48+
4449
// Check encoding is the same
4550
var b1 = expected.Encoding.GetBytes("Hello");
4651
var b2 = actual.Encoding.GetBytes("Hello");

LLama.Unittest/Native/SafeLlamaModelHandleTests.cs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using System.Runtime.InteropServices;
22
using System.Text;
33
using LLama.Common;
4-
using LLama.Extensions;
4+
using LLama.Extensions;
55
using Xunit;
66

77
namespace LLama.Unittest.Native;
@@ -20,13 +20,13 @@ public SafeLlamaModelHandleTests()
2020
_model = LLamaWeights.LoadFromFile(@params);
2121
}
2222

23-
// Note: This test is flakey, it appears to often (but not always) fail the first time it is run after downloading the model file, but then succeed every time after!
24-
//[SkippableFact]
25-
//public void MetadataValByKey_ReturnsCorrectly()
26-
//{
27-
// Skip.If(RuntimeInformation.IsOSPlatform(OSPlatform.OSX), "Skipping this test on macOS because for some reason the meta data is incorrect, but the rest of tests work well on mscOS [Check later!].");
28-
// const string key = "general.name";
29-
// var template = _model.NativeHandle.MetadataValueByKey(key);
30-
// var name = Encoding.UTF8.GetStringFromSpan(template!.Value.Span);
23+
// Note: This test is flakey, it appears to often (but not always) fail the first time it is run after downloading the model file, but then succeed every time after!
24+
//[SkippableFact]
25+
//public void MetadataValByKey_ReturnsCorrectly()
26+
//{
27+
// Skip.If(RuntimeInformation.IsOSPlatform(OSPlatform.OSX), "Skipping this test on macOS because for some reason the meta data is incorrect, but the rest of tests work well on mscOS [Check later!].");
28+
// const string key = "general.name";
29+
// var template = _model.NativeHandle.MetadataValueByKey(key);
30+
// var name = Encoding.UTF8.GetStringFromSpan(template!.Value.Span);
3131
//}
3232
}

LLama.Web/Common/ModelOptions.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ public class ModelOptions
2626
/// <inheritdoc />
2727
public GPUSplitMode? SplitMode { get; set; }
2828

29+
/// <inheritdoc />
30+
public List<TensorBufferOverride> TensorBufferOverrides { get; set; } = new();
31+
2932
/// <inheritdoc />
3033
public int GpuLayerCount { get; set; } = 20;
3134

LLama/Abstractions/IModelParams.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ public interface IModelParams
3838
/// </summary>
3939
GPUSplitMode? SplitMode { get; }
4040

41+
/// <summary>
42+
/// Buffer type overrides for specific tensor patterns, allowing you to specify hardware devices to use for individual tensors or sets of tensors.
43+
/// Equivalent to --override-tensor or -ot on the llama.cpp command line or tensor_buft_overrides internally.
44+
/// </summary>
45+
List<TensorBufferOverride> TensorBufferOverrides { get; }
46+
4147
/// <summary>
4248
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
4349
/// </summary>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
using System;
2+
3+
namespace LLama.Abstractions
4+
{
5+
/// <summary>
6+
/// Represents a mapping between a tensor name pattern and a specific buffer type
7+
/// </summary>
8+
public class TensorBufferOverride
9+
{
10+
/// <summary>
11+
/// Pattern to match tensor names. This is a regular expression. You can check the tensor names via the model.Metadata.
12+
/// </summary>
13+
public string Pattern { get; set; }
14+
15+
/// <summary>
16+
/// Buffer type to use for matching tensors. Examples: CPU, GPU0, GPU1
17+
/// </summary>
18+
public string BufferType { get; set; }
19+
20+
/// <summary>
21+
/// Creates a new tensor buffer override
22+
/// </summary>
23+
/// <param name="pattern">Pattern to match tensor names</param>
24+
/// <param name="bufferType">Buffer type to use for matching tensors</param>
25+
public TensorBufferOverride(string pattern, string bufferType)
26+
{
27+
if (string.IsNullOrEmpty(pattern))
28+
throw new ArgumentException("Pattern cannot be null or empty", nameof(pattern));
29+
if (string.IsNullOrEmpty(bufferType))
30+
throw new ArgumentException("Buffer type cannot be null or empty", nameof(bufferType));
31+
32+
Pattern = pattern;
33+
BufferType = bufferType;
34+
}
35+
}
36+
}

LLama/Common/ModelParams.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ public record ModelParams
2121
/// <inheritdoc />
2222
public GPUSplitMode? SplitMode { get; set; }
2323

24+
/// <inheritdoc />
25+
public List<TensorBufferOverride> TensorBufferOverrides { get; set; } = new();
26+
2427
/// <inheritdoc />
2528
public int GpuLayerCount { get; set; } = 20;
2629

0 commit comments

Comments
 (0)