Skip to content

Commit 37bb3c3

Browse files
committed
Merge branch 'master' into feature-llamareranker
2 parents 49ae0a8 + 9ed7378 commit 37bb3c3

19 files changed

+274
-95
lines changed

.github/workflows/compile.yml

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,25 @@ jobs:
2828
include:
2929
- build: 'noavx'
3030
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
31+
os: ubuntu-24.04
32+
arch: x64
3133
- build: 'avx2'
3234
defines: ''
35+
os: ubuntu-24.04
36+
arch: x64
3337
- build: 'avx'
3438
defines: '-DGGML_AVX2=OFF'
39+
os: ubuntu-24.04
40+
arch: x64
3541
- build: 'avx512'
3642
defines: '-DGGML_AVX512=ON'
37-
runs-on: ubuntu-24.04
43+
os: ubuntu-24.04
44+
arch: x64
45+
- build: 'aarch64'
46+
defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
47+
os: ubuntu-24.04-arm
48+
arch: arm64
49+
runs-on: ${{ matrix.os }}
3850
steps:
3951
- uses: actions/checkout@v4
4052
with:
@@ -52,28 +64,28 @@ jobs:
5264
- uses: actions/upload-artifact@v4
5365
with:
5466
path: ./build/bin/libllama.so
55-
name: llama-bin-linux-${{ matrix.build }}-x64.so
67+
name: llama-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
5668
if-no-files-found: error
5769
- uses: actions/upload-artifact@v4
5870
with:
5971
path: ./build/bin/libggml.so
60-
name: ggml-bin-linux-${{ matrix.build }}-x64.so
72+
name: ggml-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
6173
if-no-files-found: error
6274
- uses: actions/upload-artifact@v4
6375
with:
6476
path: ./build/bin/libggml-base.so
65-
name: ggml-base-bin-linux-${{ matrix.build }}-x64.so
77+
name: ggml-base-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
6678
if-no-files-found: error
6779
- uses: actions/upload-artifact@v4
6880
with:
6981
path: ./build/bin/libggml-cpu.so
70-
name: ggml-cpu-bin-linux-${{ matrix.build }}-x64.so
82+
name: ggml-cpu-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
7183
if-no-files-found: error
7284
- name: Upload Llava
7385
uses: actions/upload-artifact@v4
7486
with:
7587
path: ./build/bin/libllava_shared.so
76-
name: llava-bin-linux-${{ matrix.build }}-x64.so
88+
name: llava-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
7789
if-no-files-found: error
7890

7991
compile-musl:
@@ -601,7 +613,7 @@ jobs:
601613
- name: Rearrange Files
602614
run: |
603615
# Make all directories at once
604-
mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
616+
mkdir --parents deps/{noavx,avx,avx2,avx512,linux-arm64,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
605617
606618
# Linux
607619
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/noavx/libggml.so
@@ -628,6 +640,13 @@ jobs:
628640
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
629641
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
630642
643+
# Arm64
644+
cp artifacts/ggml-bin-linux-aarch64-arm64.so/libggml.so deps/linux-arm64/libggml.so
645+
cp artifacts/ggml-base-bin-linux-aarch64-arm64.so/libggml-base.so deps/linux-arm64/libggml-base.so
646+
cp artifacts/ggml-cpu-bin-linux-aarch64-arm64.so/libggml-cpu.so deps/linux-arm64/libggml-cpu.so
647+
cp artifacts/llama-bin-linux-aarch64-arm64.so/libllama.so deps/linux-arm64/libllama.so
648+
cp artifacts/llava-bin-linux-aarch64-arm64.so/libllava_shared.so deps/linux-arm64/libllava_shared.so
649+
631650
# Musl
632651
cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so deps/musl-noavx/libggml.so
633652
cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so

LLama.Examples/Examples/KernelMemory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ and answer questions about them in an interactive chat prompt.
4646

4747
// Ask a predefined question
4848
Console.ForegroundColor = ConsoleColor.Green;
49-
string question1 = "What formats does KM support";
49+
string question1 = "What is Kernel Memory";
5050
Console.WriteLine($"Question: {question1}");
5151
await AnswerQuestion(memory, question1);
5252

LLama.Examples/Examples/KernelMemorySaveAndLoad.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Press ENTER to proceed...
5454
await IngestDocuments(memory);
5555
}
5656

57-
await AskSingleQuestion(memory, "What formats does KM support?");
57+
await AskSingleQuestion(memory, "What is Kernel Memory");
5858
await StartUserChatSession(memory);
5959
}
6060

LLama.Examples/LLama.Examples.csproj

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515

1616
<ItemGroup>
1717
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.3" />
18-
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.97.250211.1" />
18+
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.98.250323.1" />
1919
<PackageReference Include="Microsoft.SemanticKernel" Version="1.44.0" />
20-
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.6.2-alpha" />
20+
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.44.0-alpha" />
2121
<PackageReference Include="NAudio" Version="2.2.1" />
2222
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
23-
<PackageReference Include="Spectre.Console" Version="0.49.1" />
24-
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.49.1" />
25-
<PackageReference Include="Whisper.net" Version="1.7.4" />
26-
<PackageReference Include="Whisper.net.Runtime" Version="1.7.4" />
23+
<PackageReference Include="Spectre.Console" Version="0.50.0" />
24+
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.50.0" />
25+
<PackageReference Include="Whisper.net" Version="1.8.1" />
26+
<PackageReference Include="Whisper.net.Runtime" Version="1.8.1" />
2727
<PackageReference Include="Whisper.net.Runtime.Clblast" Version="1.5.0" />
2828
<PackageReference Include="Whisper.net.Runtime.CoreML" Version="1.7.4" />
2929
<PackageReference Include="Whisper.net.Runtime.Cublas" Version="1.5.0" />

LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
3131

3232
var @params = new ModelParams(config.ModelPath)
3333
{
34-
ContextSize = config.ContextSize,
35-
GpuLayerCount = config.GpuLayerCount ?? 20,
36-
34+
ContextSize = config?.ContextSize ?? 2048,
35+
GpuLayerCount = config?.GpuLayerCount ?? 20,
36+
//Embeddings = true,
37+
MainGpu = config?.MainGpu ?? 0,
38+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
3739
PoolingType = LLamaPoolingType.Mean,
3840
};
3941

@@ -54,11 +56,11 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we
5456

5557
var @params = new ModelParams(config.ModelPath)
5658
{
57-
ContextSize = config.ContextSize ?? 2048,
58-
GpuLayerCount = config.GpuLayerCount ?? 20,
59-
Embeddings = true,
60-
MainGpu = config.MainGpu,
61-
SplitMode = config.SplitMode,
59+
ContextSize = config?.ContextSize ?? 2048,
60+
GpuLayerCount = config?.GpuLayerCount ?? 20,
61+
//Embeddings = true,
62+
MainGpu = config?.MainGpu ?? 0,
63+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
6264
PoolingType = LLamaPoolingType.Mean,
6365
};
6466
_weights = weights;

LLama.KernelMemory/LlamaSharpTextGenerator.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ public LlamaSharpTextGenerator(LLamaSharpConfig config)
3232
{
3333
var parameters = new ModelParams(config.ModelPath)
3434
{
35-
ContextSize = config.ContextSize ?? 2048,
36-
GpuLayerCount = config.GpuLayerCount ?? 20,
35+
ContextSize = config?.ContextSize ?? 2048,
36+
GpuLayerCount = config?.GpuLayerCount ?? 20,
37+
MainGpu = config?.MainGpu ?? 0,
38+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
3739
};
3840
_weights = LLamaWeights.LoadFromFile(parameters);
3941
_context = _weights.CreateContext(parameters);

LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
</PropertyGroup>
3535

3636
<ItemGroup>
37-
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.44.0" />
37+
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.48.0" />
3838
</ItemGroup>
3939

4040
<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">

LLama.Unittest/Constants.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ public static int CIGpuLayerCount
2121
{
2222
get
2323
{
24-
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
24+
//if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
2525
{
2626
#if DEBUG
2727
return 20;
2828
#else
2929
return 0;
3030
#endif
3131
}
32-
else return 20;
32+
//else return 20;
3333
}
3434
}
3535
}

LLama.Unittest/KernelMemory/ITextTokenizerTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
2222
_testOutputHelper = testOutputHelper;
2323

2424
_infParams = new() { AntiPrompts = ["\n\n"] };
25-
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512 };
25+
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512, SplitMode = LLama.Native.GPUSplitMode.Layer };
2626

2727
testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
2828
}

LLama.Unittest/LLama.Unittest.csproj

Lines changed: 93 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22
<Import Project="..\LLama\LLamaSharp.Runtime.targets" />
33
<PropertyGroup>
44
<TargetFramework>net8.0</TargetFramework>
@@ -25,35 +25,105 @@
2525
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
2626
<PrivateAssets>all</PrivateAssets>
2727
</PackageReference>
28+
<PackageReference Include="Xunit.SkippableFact" Version="1.5.23" />
2829
</ItemGroup>
2930

30-
<Target Name="DownloadContentFilesInner">
31-
32-
<DownloadFile SourceUrl="https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" DestinationFolder="Models" DestinationFileName="Llama-3.2-1B-Instruct-Q4_0.gguf" SkipUnchangedFiles="true">
33-
</DownloadFile>
31+
<!-- Define each file to download.
32+
The Include value is just an identifier.
33+
SourceUrl is the remote URL.
34+
DestinationFolder is where you want it saved.
35+
LocalFileName is the desired file name. -->
36+
<ItemGroup>
37+
<DownloadFileItem Include="Llama-3.2-1B-Instruct-Q4_0">
38+
<SourceUrl>https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf</SourceUrl>
39+
<DestinationFolder>Models</DestinationFolder>
40+
<LocalFileName>Llama-3.2-1B-Instruct-Q4_0.gguf</LocalFileName>
41+
</DownloadFileItem>
3442

35-
<DownloadFile SourceUrl="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF/resolve/main/smollm-360m-instruct-add-basics-q8_0.gguf" DestinationFolder="Models" DestinationFileName="smollm-360m-instruct-add-basics-q8_0.gguf" SkipUnchangedFiles="true">
36-
</DownloadFile>
43+
<DownloadFileItem Include="smollm-360m-instruct-add-basics-q8_0">
44+
<SourceUrl>https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-v0.2-Q8_0-GGUF/resolve/main/smollm-360m-instruct-add-basics-q8_0.gguf</SourceUrl>
45+
<DestinationFolder>Models</DestinationFolder>
46+
<LocalFileName>smollm-360m-instruct-add-basics-q8_0.gguf</LocalFileName>
47+
</DownloadFileItem>
3748

38-
<DownloadFile SourceUrl="https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-FP16.gguf" DestinationFolder="Models" DestinationFileName="jina-reranker-v1-tiny-en-FP16.gguf" SkipUnchangedFiles="true">
39-
</DownloadFile>
49+
<DownloadFileItem Include="jina-reranker-v1-tiny-en-FP16.gguf">
50+
<SourceUrl>https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-FP16.gguf</SourceUrl>
51+
<DestinationFolder>Models</DestinationFolder>
52+
<LocalFileName>jina-reranker-v1-tiny-en-FP16.gguf</LocalFileName>
53+
</DownloadFileItem>
4054

41-
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
42-
</DownloadFile>
43-
44-
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true">
45-
</DownloadFile>
46-
47-
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true">
48-
</DownloadFile>
55+
<DownloadFileItem Include="llava-v1.6-mistral-7b">
56+
<SourceUrl>https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf</SourceUrl>
57+
<DestinationFolder>Models</DestinationFolder>
58+
<LocalFileName>llava-v1.6-mistral-7b.Q3_K_XS.gguf</LocalFileName>
59+
</DownloadFileItem>
4960

50-
</Target>
51-
52-
<Target Name="DownloadContentFiles" BeforeTargets="DispatchToInnerBuilds;BeforeBuild">
53-
<MSBuild Projects="$(MSBuildProjectFile)" Targets="DownloadContentFilesInner" Properties="TargetFramework=once" />
54-
</Target>
61+
<DownloadFileItem Include="mmproj-model-f16">
62+
<SourceUrl>https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf</SourceUrl>
63+
<DestinationFolder>Models</DestinationFolder>
64+
<LocalFileName>mmproj-model-f16.gguf</LocalFileName>
65+
</DownloadFileItem>
5566

56-
<ItemGroup>
67+
<DownloadFileItem Include="all-MiniLM-L12-v2">
68+
<SourceUrl>https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf</SourceUrl>
69+
<DestinationFolder>Models</DestinationFolder>
70+
<LocalFileName>all-MiniLM-L12-v2.Q8_0.gguf</LocalFileName>
71+
</DownloadFileItem>
72+
</ItemGroup>
73+
74+
<!-- Ensure the destination folder exists -->
75+
<Target Name="EnsureFolders">
76+
<MakeDir Directories="Models" Condition="!Exists('Models')" />
77+
</Target>
78+
79+
<!-- Download a single file:
80+
- Computes the full target file name (DesiredFile).
81+
- If DesiredFile already exists, the download is skipped.
82+
- Otherwise, creates a temporary folder (TempDownload),
83+
downloads the file there using DownloadFile, and then moves it
84+
to DesiredFile. Finally, cleans up the temporary folder. -->
85+
<Target Name="DownloadSingleFile" DependsOnTargets="EnsureFolders">
86+
<!-- (These properties come in via the MSBuild call.) -->
87+
<PropertyGroup>
88+
<DesiredFile>$([System.IO.Path]::Combine($(DestinationFolder), $(LocalFileName)))</DesiredFile>
89+
</PropertyGroup>
90+
91+
<Message Text="Processing file: $(DesiredFile)" Importance="high" />
92+
93+
<!-- Define a flag based on whether the file already exists -->
94+
<PropertyGroup>
95+
<DownloadNeeded Condition="!Exists('$(DesiredFile)')">true</DownloadNeeded>
96+
<DownloadNeeded Condition="Exists('$(DesiredFile)')">false</DownloadNeeded>
97+
</PropertyGroup>
98+
<Message Text="Download needed: $(DownloadNeeded)" Importance="high" />
99+
100+
<!-- If the file is already present, skip the download (by simply exiting this target) -->
101+
<Message Text="File $(DesiredFile) already exists; skipping download." Importance="high" Condition=" '$(DownloadNeeded)'=='false' " />
102+
103+
<!-- Only download if required -->
104+
<DownloadFile SourceUrl="$(SourceUrl)" DestinationFolder="TempDownload" SkipUnchangedFiles="true" Condition=" '$(DownloadNeeded)'=='true' " />
105+
106+
<!-- If a file was downloaded, move it to the desired name.
107+
We assume TempDownload now contains the downloaded file.
108+
(You might want to refine this if TempDownload could ever contain multiple files.) -->
109+
<ItemGroup Condition=" '$(DownloadNeeded)'=='true' ">
110+
<TempFile Include="TempDownload/*.*" />
111+
</ItemGroup>
112+
<Message Text="Downloaded file (temp): @(TempFile)" Importance="high" Condition=" '$(DownloadNeeded)'=='true' " />
113+
<Move SourceFiles="@(TempFile)" DestinationFiles="$(DesiredFile)" Condition=" '$(DownloadNeeded)'=='true' and @(TempFile) != '' " />
114+
<Message Text="Renamed downloaded file to $(DesiredFile)" Importance="high" Condition=" '$(DownloadNeeded)'=='true' and @(TempFile) != '' " />
115+
116+
<!-- Remove the temporary download folder -->
117+
<RemoveDir Directories="TempDownload" Condition="Exists('TempDownload')" />
118+
</Target>
119+
120+
<!-- Main target to process each file by calling the DownloadSingleFile target for each item.
121+
The MSBuild task will batch over the DownloadFileItem items, passing in each file’s metadata. -->
122+
<Target Name="DownloadAllFiles" BeforeTargets="DispatchToInnerBuilds;BeforeBuild">
123+
<MSBuild Projects="$(MSBuildProjectFile)" Targets="DownloadSingleFile" Properties="SourceUrl=%(DownloadFileItem.SourceUrl);DestinationFolder=%(DownloadFileItem.DestinationFolder);LocalFileName=%(DownloadFileItem.LocalFileName);TargetFramework=once" />
124+
</Target>
125+
126+
<ItemGroup>
57127
<ProjectReference Include="..\LLama.KernelMemory\LLamaSharp.KernelMemory.csproj" />
58128
<ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
59129
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />

0 commit comments

Comments
 (0)