Skip to content

Commit d905f9d

Browse files
authored
Merge branch 'master' into feat/tensor-override
2 parents 1ac27e2 + 1668e76 commit d905f9d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1699
-206
lines changed

.github/workflows/compile.yml

Lines changed: 62 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,25 @@ jobs:
2828
include:
2929
- build: 'noavx'
3030
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
31+
os: ubuntu-22.04
32+
arch: x64
3133
- build: 'avx2'
3234
defines: ''
35+
os: ubuntu-22.04
36+
arch: x64
3337
- build: 'avx'
3438
defines: '-DGGML_AVX2=OFF'
39+
os: ubuntu-22.04
40+
arch: x64
3541
- build: 'avx512'
3642
defines: '-DGGML_AVX512=ON'
37-
runs-on: ubuntu-24.04
43+
os: ubuntu-22.04
44+
arch: x64
45+
- build: 'aarch64'
46+
defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
47+
os: ubuntu-24.04-arm
48+
arch: arm64
49+
runs-on: ${{ matrix.os }}
3850
steps:
3951
- uses: actions/checkout@v4
4052
with:
@@ -52,28 +64,28 @@ jobs:
5264
- uses: actions/upload-artifact@v4
5365
with:
5466
path: ./build/bin/libllama.so
55-
name: llama-bin-linux-${{ matrix.build }}-x64.so
67+
name: llama-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
5668
if-no-files-found: error
5769
- uses: actions/upload-artifact@v4
5870
with:
5971
path: ./build/bin/libggml.so
60-
name: ggml-bin-linux-${{ matrix.build }}-x64.so
72+
name: ggml-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
6173
if-no-files-found: error
6274
- uses: actions/upload-artifact@v4
6375
with:
6476
path: ./build/bin/libggml-base.so
65-
name: ggml-base-bin-linux-${{ matrix.build }}-x64.so
77+
name: ggml-base-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
6678
if-no-files-found: error
6779
- uses: actions/upload-artifact@v4
6880
with:
6981
path: ./build/bin/libggml-cpu.so
70-
name: ggml-cpu-bin-linux-${{ matrix.build }}-x64.so
82+
name: ggml-cpu-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
7183
if-no-files-found: error
7284
- name: Upload Llava
7385
uses: actions/upload-artifact@v4
7486
with:
7587
path: ./build/bin/libllava_shared.so
76-
name: llava-bin-linux-${{ matrix.build }}-x64.so
88+
name: llava-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
7789
if-no-files-found: error
7890

7991
compile-musl:
@@ -527,19 +539,15 @@ jobs:
527539
if-no-files-found: error
528540

529541
compile-android:
530-
# Disable android build
531-
if: false
532-
542+
name: Compile (Android)
533543
strategy:
534544
fail-fast: true
535545
matrix:
536546
include:
537-
- build: 'x86'
538-
defines: '-DANDROID_ABI=x86'
539547
- build: 'x86_64'
540-
defines: '-DANDROID_ABI=x86_64'
548+
defines: '-DANDROID_ABI=x86_64 -DCMAKE_C_FLAGS=-march=x86-64 -DCMAKE_CXX_FLAGS=-march=x86-64'
541549
- build: 'arm64-v8a'
542-
defines: '-DANDROID_ABI=arm64-v8a'
550+
defines: '-DANDROID_ABI=arm64-v8a -DCMAKE_C_FLAGS=-march=armv8.7a -DCMAKE_C_FLAGS=-march=armv8.7a'
543551
runs-on: ubuntu-24.04
544552
steps:
545553
- uses: actions/checkout@v4
@@ -555,28 +563,39 @@ jobs:
555563
- name: Build
556564
id: cmake_build
557565
env:
558-
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23'
566+
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23 -DGGML_OPENMP=OFF -DGGML_LLAMAFILE=OFF'
559567
run: |
560-
mkdir build
561-
cd build
562-
cmake .. ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }}
563-
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
564-
cd ..
565-
ls -R
568+
# export-lora not supported on 32 bit machines hence breaks x86 build
569+
sed -i '/add_subdirectory(export-lora)/d' examples/CMakeLists.txt # remove export-lora from examples
570+
cmake ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }} -B build
571+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
566572
- name: Upload Llama
567573
uses: actions/upload-artifact@v4
568574
with:
569-
path: ./build/src/libllama.so
575+
path: ./build/bin/libllama.so
570576
name: llama-bin-android-${{ matrix.build }}.so
571-
- uses: actions/upload-artifact@v4
577+
- name: Upload GGML
578+
uses: actions/upload-artifact@v4
572579
with:
573-
path: ./build/ggml/src/libggml.so
580+
path: ./build/bin/libggml.so
574581
name: ggml-bin-android-${{ matrix.build }}.so
575582
if-no-files-found: error
583+
- name: Upload GGML Base
584+
uses: actions/upload-artifact@v4
585+
with:
586+
path: ./build/bin/libggml-base.so
587+
name: ggml-base-bin-android-${{ matrix.build }}.so
588+
if-no-files-found: error
589+
- name: Upload GGML CPU
590+
uses: actions/upload-artifact@v4
591+
with:
592+
path: ./build/bin/libggml-cpu.so
593+
name: ggml-cpu-bin-android-${{ matrix.build }}.so
594+
if-no-files-found: error
576595
- name: Upload Llava
577596
uses: actions/upload-artifact@v4
578597
with:
579-
path: ./build/examples/llava/libllava_shared.so
598+
path: ./build/bin/libllava_shared.so
580599
name: llava-bin-android-${{ matrix.build }}.so
581600

582601
build-deps:
@@ -601,7 +620,7 @@ jobs:
601620
- name: Rearrange Files
602621
run: |
603622
# Make all directories at once
604-
mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
623+
mkdir --parents deps/{noavx,avx,avx2,avx512,linux-arm64,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
605624
606625
# Linux
607626
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/noavx/libggml.so
@@ -628,6 +647,13 @@ jobs:
628647
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
629648
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
630649
650+
# Arm64
651+
cp artifacts/ggml-bin-linux-aarch64-arm64.so/libggml.so deps/linux-arm64/libggml.so
652+
cp artifacts/ggml-base-bin-linux-aarch64-arm64.so/libggml-base.so deps/linux-arm64/libggml-base.so
653+
cp artifacts/ggml-cpu-bin-linux-aarch64-arm64.so/libggml-cpu.so deps/linux-arm64/libggml-cpu.so
654+
cp artifacts/llama-bin-linux-aarch64-arm64.so/libllama.so deps/linux-arm64/libllama.so
655+
cp artifacts/llava-bin-linux-aarch64-arm64.so/libllava_shared.so deps/linux-arm64/libllava_shared.so
656+
631657
# Musl
632658
cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so deps/musl-noavx/libggml.so
633659
cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
@@ -703,17 +729,17 @@ jobs:
703729
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib
704730
705731
# Android
706-
#cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
707-
#cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
708-
#cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
709-
710-
#cp artifacts/ggml-bin-android-x86.so/libggml.so deps/android-x86/libggml.so
711-
#cp artifacts/llama-bin-android-x86.so/libllama.so deps/android-x86/libllama.so
712-
#cp artifacts/llava-bin-android-x86.so/libllava_shared.so deps/android-x86/libllava_shared.so
713-
714-
#cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
715-
#cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
716-
#cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
732+
cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
733+
cp artifacts/ggml-base-bin-android-arm64-v8a.so/libggml-base.so deps/android-arm64-v8a/libggml-base.so
734+
cp artifacts/ggml-cpu-bin-android-arm64-v8a.so/libggml-cpu.so deps/android-arm64-v8a/libggml-cpu.so
735+
cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
736+
cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
737+
738+
cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
739+
cp artifacts/ggml-base-bin-android-x86_64.so/libggml-base.so deps/android-x86_64/libggml-base.so
740+
cp artifacts/ggml-cpu-bin-android-x86_64.so/libggml-cpu.so deps/android-x86_64/libggml-cpu.so
741+
cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
742+
cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
717743
718744
# Windows CUDA
719745
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll

.github/workflows/main.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ jobs:
3838
with:
3939
dotnet-version: |
4040
8.0.x
41+
- name: Install Mobile Workloads
42+
if: ${{ contains(runner.os, 'windows') }}
43+
run: |
44+
dotnet workload install android --ignore-failed-sources
45+
dotnet workload install maui --ignore-failed-sources
46+
- name: Remove Mobile Project
47+
if: ${{ !contains(runner.os, 'windows') }}
48+
run: |
49+
dotnet sln LLamaSharp.sln remove Llama.Mobile
4150
- name: Cache Packages
4251
uses: actions/cache@v4
4352
with:

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,6 @@ test/TensorFlowNET.Examples/mnist
337337
# training model resources
338338
.resources
339339
/redist
340-
*.xml
341340
*.xsd
342341

343342
# docs

LLama.Examples/Examples/KernelMemory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ and answer questions about them in an interactive chat prompt.
4646

4747
// Ask a predefined question
4848
Console.ForegroundColor = ConsoleColor.Green;
49-
string question1 = "What formats does KM support";
49+
string question1 = "What is Kernel Memory";
5050
Console.WriteLine($"Question: {question1}");
5151
await AnswerQuestion(memory, question1);
5252

LLama.Examples/Examples/KernelMemorySaveAndLoad.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Press ENTER to proceed...
5454
await IngestDocuments(memory);
5555
}
5656

57-
await AskSingleQuestion(memory, "What formats does KM support?");
57+
await AskSingleQuestion(memory, "What is Kernel Memory");
5858
await StartUserChatSession(memory);
5959
}
6060

LLama.Examples/LLama.Examples.csproj

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515

1616
<ItemGroup>
1717
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.3" />
18-
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.97.250211.1" />
18+
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.98.250323.1" />
1919
<PackageReference Include="Microsoft.SemanticKernel" Version="1.44.0" />
20-
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.6.2-alpha" />
20+
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.44.0-alpha" />
2121
<PackageReference Include="NAudio" Version="2.2.1" />
2222
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
23-
<PackageReference Include="Spectre.Console" Version="0.49.1" />
24-
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.49.1" />
25-
<PackageReference Include="Whisper.net" Version="1.7.4" />
26-
<PackageReference Include="Whisper.net.Runtime" Version="1.7.4" />
23+
<PackageReference Include="Spectre.Console" Version="0.50.0" />
24+
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.50.0" />
25+
<PackageReference Include="Whisper.net" Version="1.8.1" />
26+
<PackageReference Include="Whisper.net.Runtime" Version="1.8.1" />
2727
<PackageReference Include="Whisper.net.Runtime.Clblast" Version="1.5.0" />
2828
<PackageReference Include="Whisper.net.Runtime.CoreML" Version="1.7.4" />
2929
<PackageReference Include="Whisper.net.Runtime.Cublas" Version="1.5.0" />

LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
3131

3232
var @params = new ModelParams(config.ModelPath)
3333
{
34-
ContextSize = config.ContextSize,
35-
GpuLayerCount = config.GpuLayerCount ?? 20,
36-
34+
ContextSize = config?.ContextSize ?? 2048,
35+
GpuLayerCount = config?.GpuLayerCount ?? 20,
36+
//Embeddings = true,
37+
MainGpu = config?.MainGpu ?? 0,
38+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
3739
PoolingType = LLamaPoolingType.Mean,
3840
};
3941

@@ -54,11 +56,11 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we
5456

5557
var @params = new ModelParams(config.ModelPath)
5658
{
57-
ContextSize = config.ContextSize ?? 2048,
58-
GpuLayerCount = config.GpuLayerCount ?? 20,
59-
Embeddings = true,
60-
MainGpu = config.MainGpu,
61-
SplitMode = config.SplitMode,
59+
ContextSize = config?.ContextSize ?? 2048,
60+
GpuLayerCount = config?.GpuLayerCount ?? 20,
61+
//Embeddings = true,
62+
MainGpu = config?.MainGpu ?? 0,
63+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
6264
PoolingType = LLamaPoolingType.Mean,
6365
};
6466
_weights = weights;

LLama.KernelMemory/LlamaSharpTextGenerator.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ public LlamaSharpTextGenerator(LLamaSharpConfig config)
3232
{
3333
var parameters = new ModelParams(config.ModelPath)
3434
{
35-
ContextSize = config.ContextSize ?? 2048,
36-
GpuLayerCount = config.GpuLayerCount ?? 20,
35+
ContextSize = config?.ContextSize ?? 2048,
36+
GpuLayerCount = config?.GpuLayerCount ?? 20,
37+
MainGpu = config?.MainGpu ?? 0,
38+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
3739
};
3840
_weights = LLamaWeights.LoadFromFile(parameters);
3941
_context = _weights.CreateContext(parameters);

LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
</PropertyGroup>
3535

3636
<ItemGroup>
37-
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.44.0" />
37+
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.48.0" />
3838
</ItemGroup>
3939

4040
<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">

LLama.Unittest/Constants.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ public static int CIGpuLayerCount
2020
{
2121
get
2222
{
23-
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
23+
//if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
2424
{
2525
#if DEBUG
2626
return 20;
2727
#else
2828
return 0;
2929
#endif
3030
}
31-
else return 20;
31+
//else return 20;
3232
}
3333
}
3434
}

0 commit comments

Comments
 (0)