Skip to content

Commit 7b2ee55

Browse files
committed
Merge remote-tracking branch 'upstream/master'
1 parent 2bcb62e commit 7b2ee55

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1696
-114
lines changed

.github/_typos.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ extend-exclude = [
1717

1818
[default.extend-words]
1919
# Used in a comment in SafeLLamaSamplerHandle.cs, as a prefix of "hello"
20-
teh = "hel"
20+
teh = "hel"
21+
# ot is the shorthand version of llama.cpp's override-tensor parameter
22+
ot = "ot"

.github/workflows/compile.yml

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,19 @@ jobs:
2828
include:
2929
- build: 'noavx'
3030
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
31-
os: ubuntu-24.04
31+
os: ubuntu-22.04
3232
arch: x64
3333
- build: 'avx2'
3434
defines: ''
35-
os: ubuntu-24.04
35+
os: ubuntu-22.04
3636
arch: x64
3737
- build: 'avx'
3838
defines: '-DGGML_AVX2=OFF'
39-
os: ubuntu-24.04
39+
os: ubuntu-22.04
4040
arch: x64
4141
- build: 'avx512'
4242
defines: '-DGGML_AVX512=ON'
43-
os: ubuntu-24.04
43+
os: ubuntu-22.04
4444
arch: x64
4545
- build: 'aarch64'
4646
defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
@@ -539,19 +539,15 @@ jobs:
539539
if-no-files-found: error
540540

541541
compile-android:
542-
# Disable android build
543-
if: false
544-
542+
name: Compile (Android)
545543
strategy:
546544
fail-fast: true
547545
matrix:
548546
include:
549-
- build: 'x86'
550-
defines: '-DANDROID_ABI=x86'
551547
- build: 'x86_64'
552-
defines: '-DANDROID_ABI=x86_64'
548+
defines: '-DANDROID_ABI=x86_64 -DCMAKE_C_FLAGS=-march=x86-64 -DCMAKE_CXX_FLAGS=-march=x86-64'
553549
- build: 'arm64-v8a'
554-
defines: '-DANDROID_ABI=arm64-v8a'
550+
defines: '-DANDROID_ABI=arm64-v8a -DCMAKE_C_FLAGS=-march=armv8.7a -DCMAKE_C_FLAGS=-march=armv8.7a'
555551
runs-on: ubuntu-24.04
556552
steps:
557553
- uses: actions/checkout@v4
@@ -567,28 +563,39 @@ jobs:
567563
- name: Build
568564
id: cmake_build
569565
env:
570-
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23'
566+
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23 -DGGML_OPENMP=OFF -DGGML_LLAMAFILE=OFF'
571567
run: |
572-
mkdir build
573-
cd build
574-
cmake .. ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }}
575-
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
576-
cd ..
577-
ls -R
568+
# export-lora not supported on 32 bit machines hence breaks x86 build
569+
sed -i '/add_subdirectory(export-lora)/d' examples/CMakeLists.txt # remove export-lora from examples
570+
cmake ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }} -B build
571+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
578572
- name: Upload Llama
579573
uses: actions/upload-artifact@v4
580574
with:
581-
path: ./build/src/libllama.so
575+
path: ./build/bin/libllama.so
582576
name: llama-bin-android-${{ matrix.build }}.so
583-
- uses: actions/upload-artifact@v4
577+
- name: Upload GGML
578+
uses: actions/upload-artifact@v4
584579
with:
585-
path: ./build/ggml/src/libggml.so
580+
path: ./build/bin/libggml.so
586581
name: ggml-bin-android-${{ matrix.build }}.so
587582
if-no-files-found: error
583+
- name: Upload GGML Base
584+
uses: actions/upload-artifact@v4
585+
with:
586+
path: ./build/bin/libggml-base.so
587+
name: ggml-base-bin-android-${{ matrix.build }}.so
588+
if-no-files-found: error
589+
- name: Upload GGML CPU
590+
uses: actions/upload-artifact@v4
591+
with:
592+
path: ./build/bin/libggml-cpu.so
593+
name: ggml-cpu-bin-android-${{ matrix.build }}.so
594+
if-no-files-found: error
588595
- name: Upload Llava
589596
uses: actions/upload-artifact@v4
590597
with:
591-
path: ./build/examples/llava/libllava_shared.so
598+
path: ./build/bin/libllava_shared.so
592599
name: llava-bin-android-${{ matrix.build }}.so
593600

594601
build-deps:
@@ -722,17 +729,17 @@ jobs:
722729
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib
723730
724731
# Android
725-
#cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
726-
#cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
727-
#cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
728-
729-
#cp artifacts/ggml-bin-android-x86.so/libggml.so deps/android-x86/libggml.so
730-
#cp artifacts/llama-bin-android-x86.so/libllama.so deps/android-x86/libllama.so
731-
#cp artifacts/llava-bin-android-x86.so/libllava_shared.so deps/android-x86/libllava_shared.so
732-
733-
#cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
734-
#cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
735-
#cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
732+
cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
733+
cp artifacts/ggml-base-bin-android-arm64-v8a.so/libggml-base.so deps/android-arm64-v8a/libggml-base.so
734+
cp artifacts/ggml-cpu-bin-android-arm64-v8a.so/libggml-cpu.so deps/android-arm64-v8a/libggml-cpu.so
735+
cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
736+
cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
737+
738+
cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
739+
cp artifacts/ggml-base-bin-android-x86_64.so/libggml-base.so deps/android-x86_64/libggml-base.so
740+
cp artifacts/ggml-cpu-bin-android-x86_64.so/libggml-cpu.so deps/android-x86_64/libggml-cpu.so
741+
cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
742+
cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
736743
737744
# Windows CUDA
738745
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll

.github/workflows/main.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ jobs:
3838
with:
3939
dotnet-version: |
4040
8.0.x
41+
- name: Install Mobile Workloads
42+
if: ${{ contains(runner.os, 'windows') }}
43+
run: |
44+
dotnet workload install android --ignore-failed-sources
45+
dotnet workload install maui --ignore-failed-sources
46+
- name: Remove Mobile Project
47+
if: ${{ !contains(runner.os, 'windows') }}
48+
run: |
49+
dotnet sln LLamaSharp.sln remove Llama.Mobile
4150
- name: Cache Packages
4251
uses: actions/cache@v4
4352
with:

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,6 @@ test/TensorFlowNET.Examples/mnist
337337
# training model resources
338338
.resources
339339
/redist
340-
*.xml
341340
*.xsd
342341

343342
# docs

LLama.Unittest/ModelsParamsTests.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public void SerializeRoundTripSystemTextJson()
4141
actual.MetadataOverrides = null!;
4242
expected.MetadataOverrides = null!;
4343

44+
// Same deal
45+
Assert.True(expected.TensorBufferOverrides.SequenceEqual(actual.TensorBufferOverrides));
46+
actual.TensorBufferOverrides = null!;
47+
expected.TensorBufferOverrides = null!;
48+
4449
// Check encoding is the same
4550
var b1 = expected.Encoding.GetBytes("Hello");
4651
var b2 = actual.Encoding.GetBytes("Hello");
Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using System.Runtime.InteropServices;
22
using System.Text;
33
using LLama.Common;
4-
using LLama.Extensions;
4+
using LLama.Extensions;
55
using Xunit;
66

77
namespace LLama.Unittest.Native;
@@ -19,21 +19,15 @@ public SafeLlamaModelHandleTests()
1919
};
2020
_model = LLamaWeights.LoadFromFile(@params);
2121
}
22+
23+
// Note: This test is flakey, it appears to often (but not always) fail the first time it is run after downloading the model file, but then succeed every time after!
24+
//[SkippableFact]
25+
//public void MetadataValByKey_ReturnsCorrectly()
26+
//{
27+
// Skip.If(RuntimeInformation.IsOSPlatform(OSPlatform.OSX), "Skipping this test on macOS because for some reason the meta data is incorrect, but the rest of tests work well on mscOS [Check later!].");
2228

23-
[SkippableFact]
24-
public void MetadataValByKey_ReturnsCorrectly()
25-
{
26-
Skip.If(RuntimeInformation.IsOSPlatform(OSPlatform.OSX), "Skipping this test on macOS because for some reason the meta data is incorrect, but the rest of tests work well on mscOS [Check later!].");
27-
28-
const string key = "general.name";
29-
var template = _model.NativeHandle.MetadataValueByKey(key);
30-
var name = Encoding.UTF8.GetStringFromSpan(template!.Value.Span);
31-
32-
const string expected = "SmolLM 360M";
33-
Assert.Equal(expected, name);
34-
35-
var metadataLookup = _model.Metadata[key];
36-
Assert.Equal(expected, metadataLookup);
37-
Assert.Equal(name, metadataLookup);
38-
}
29+
// const string key = "general.name";
30+
// var template = _model.NativeHandle.MetadataValueByKey(key);
31+
// var name = Encoding.UTF8.GetStringFromSpan(template!.Value.Span);
32+
//}
3933
}

LLama.Web/Common/ModelOptions.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ public class ModelOptions
2626
/// <inheritdoc />
2727
public GPUSplitMode? SplitMode { get; set; }
2828

29+
/// <inheritdoc />
30+
public List<TensorBufferOverride> TensorBufferOverrides { get; set; } = new();
31+
2932
/// <inheritdoc />
3033
public int GpuLayerCount { get; set; } = 20;
3134

LLama/Abstractions/IModelParams.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ public interface IModelParams
3838
/// </summary>
3939
GPUSplitMode? SplitMode { get; }
4040

41+
/// <summary>
42+
/// Buffer type overrides for specific tensor patterns, allowing you to specify hardware devices to use for individual tensors or sets of tensors.
43+
/// Equivalent to --override-tensor or -ot on the llama.cpp command line or tensor_buft_overrides internally.
44+
/// </summary>
45+
List<TensorBufferOverride> TensorBufferOverrides { get; }
46+
4147
/// <summary>
4248
/// Number of layers to run in VRAM / GPU memory (n_gpu_layers)
4349
/// </summary>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
using System;
2+
3+
namespace LLama.Abstractions
4+
{
5+
/// <summary>
6+
/// Represents a mapping between a tensor name pattern and a specific buffer type
7+
/// </summary>
8+
public class TensorBufferOverride
9+
{
10+
/// <summary>
11+
/// Pattern to match tensor names. This is a regular expression. You can check the tensor names via the model.Metadata.
12+
/// </summary>
13+
public string Pattern { get; set; }
14+
15+
/// <summary>
16+
/// Buffer type to use for matching tensors. Examples: CPU, GPU0, GPU1
17+
/// </summary>
18+
public string BufferType { get; set; }
19+
20+
/// <summary>
21+
/// Creates a new tensor buffer override
22+
/// </summary>
23+
/// <param name="pattern">Pattern to match tensor names</param>
24+
/// <param name="bufferType">Buffer type to use for matching tensors</param>
25+
public TensorBufferOverride(string pattern, string bufferType)
26+
{
27+
if (string.IsNullOrEmpty(pattern))
28+
throw new ArgumentException("Pattern cannot be null or empty", nameof(pattern));
29+
if (string.IsNullOrEmpty(bufferType))
30+
throw new ArgumentException("Buffer type cannot be null or empty", nameof(bufferType));
31+
32+
Pattern = pattern;
33+
BufferType = bufferType;
34+
}
35+
}
36+
}

LLama/Common/ModelParams.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ public record ModelParams
2121
/// <inheritdoc />
2222
public GPUSplitMode? SplitMode { get; set; }
2323

24+
/// <inheritdoc />
25+
public List<TensorBufferOverride> TensorBufferOverrides { get; set; } = new();
26+
2427
/// <inheritdoc />
2528
public int GpuLayerCount { get; set; } = 20;
2629

0 commit comments

Comments
 (0)