Skip to content

Commit a53f503

Browse files
committed
Merge branch 'master' of https://github.com/nipeone/LLamaSharp
2 parents a69f814 + 474cfd1 commit a53f503

22 files changed

+228
-95
lines changed

.github/workflows/compile.yml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ concurrency:
1717

1818
env:
1919
# Compiler defines common to all platforms
20-
COMMON_DEFINE: -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON
20+
COMMON_DEFINE: -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF
2121

2222
jobs:
2323
compile-linux:
@@ -28,23 +28,23 @@ jobs:
2828
include:
2929
- build: 'noavx'
3030
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
31-
os: ubuntu-20.04
31+
os: ubuntu-24.04
3232
arch: x64
3333
- build: 'avx2'
3434
defines: ''
35-
os: ubuntu-20.04
35+
os: ubuntu-24.04
3636
arch: x64
3737
- build: 'avx'
3838
defines: '-DGGML_AVX2=OFF'
39-
os: ubuntu-20.04
39+
os: ubuntu-24.04
4040
arch: x64
4141
- build: 'avx512'
4242
defines: '-DGGML_AVX512=ON'
43-
os: ubuntu-20.04
43+
os: ubuntu-24.04
4444
arch: x64
4545
- build: 'aarch64'
4646
defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
47-
os: ubuntu-22.04-arm
47+
os: ubuntu-24.04-arm
4848
arch: arm64
4949
runs-on: ${{ matrix.os }}
5050
steps:
@@ -102,7 +102,7 @@ jobs:
102102
defines: '-DGGML_AVX2=OFF'
103103
- build: 'avx512'
104104
defines: '-DGGML_AVX512=ON'
105-
runs-on: ubuntu-20.04
105+
runs-on: ubuntu-24.04
106106
container:
107107
image: alpine:latest
108108
steps:
@@ -346,7 +346,7 @@ jobs:
346346
strategy:
347347
fail-fast: false
348348
matrix:
349-
os: [ubuntu-20.04, windows-2019]
349+
os: [ubuntu-22.04, windows-2019]
350350
cuda: ['12.2.0', '11.7.1']
351351
runs-on: ${{ matrix.os }}
352352
steps:
@@ -421,35 +421,35 @@ jobs:
421421
if-no-files-found: error
422422

423423
- name: Upload artifacts (Linux)
424-
if: ${{ matrix.os == 'ubuntu-20.04' }}
424+
if: ${{ matrix.os == 'ubuntu-22.04' }}
425425
uses: actions/upload-artifact@v4
426426
with:
427427
path: ./build/bin/libllama.so
428428
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
429429
if-no-files-found: error
430430
- name: Upload artifacts ggml (Linux)
431-
if: ${{ matrix.os == 'ubuntu-20.04' }}
431+
if: ${{ matrix.os == 'ubuntu-22.04' }}
432432
uses: actions/upload-artifact@v4
433433
with:
434434
path: ./build/bin/libggml.so
435435
name: ggml-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
436436
if-no-files-found: error
437437
- name: Upload artifacts ggml-base (Linux)
438-
if: ${{ matrix.os == 'ubuntu-20.04' }}
438+
if: ${{ matrix.os == 'ubuntu-22.04' }}
439439
uses: actions/upload-artifact@v4
440440
with:
441441
path: ./build/bin/libggml-base.so
442442
name: ggml-base-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
443443
if-no-files-found: error
444444
- name: Upload artifacts ggml-cuda (Linux)
445-
if: ${{ matrix.os == 'ubuntu-20.04' }}
445+
if: ${{ matrix.os == 'ubuntu-22.04' }}
446446
uses: actions/upload-artifact@v4
447447
with:
448448
path: ./build/bin/libggml-cuda.so
449449
name: ggml-cuda-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
450450
if-no-files-found: error
451451
- name: Upload llava artifacts (Linux)
452-
if: ${{ matrix.os == 'ubuntu-20.04' }}
452+
if: ${{ matrix.os == 'ubuntu-22.04' }}
453453
uses: actions/upload-artifact@v4
454454
with:
455455
path: ./build/bin/libllava_shared.so
@@ -552,7 +552,7 @@ jobs:
552552
defines: '-DANDROID_ABI=x86_64'
553553
- build: 'arm64-v8a'
554554
defines: '-DANDROID_ABI=arm64-v8a'
555-
runs-on: ubuntu-20.04
555+
runs-on: ubuntu-24.04
556556
steps:
557557
- uses: actions/checkout@v4
558558
with:

.github/workflows/stale_issues.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: Close stale issues
2+
3+
on:
4+
schedule:
5+
- cron: '0 0 * * *'
6+
7+
jobs:
8+
stale:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- uses: actions/stale@v9
12+
with:
13+
repo-token: ${{ secrets.GITHUB_TOKEN }}
14+
stale-issue-message: 'This issue has been automatically marked as stale due to inactivity. If no further activity occurs, it will be closed in 7 days.'
15+
stale-pr-message: 'This pull request has been automatically marked as stale due to inactivity. If no further activity occurs, it will be closed in 7 days.'
16+
days-before-stale: 60
17+
days-before-close: 7
18+
stale-issue-label: 'stale'
19+
exempt-issue-labels: 'do not close'
20+
operations-per-run: 30

LLama.Examples/Examples/KernelMemory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ and answer questions about them in an interactive chat prompt.
4646

4747
// Ask a predefined question
4848
Console.ForegroundColor = ConsoleColor.Green;
49-
string question1 = "What formats does KM support";
49+
string question1 = "What is Kernel Memory";
5050
Console.WriteLine($"Question: {question1}");
5151
await AnswerQuestion(memory, question1);
5252

LLama.Examples/Examples/KernelMemorySaveAndLoad.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Press ENTER to proceed...
5454
await IngestDocuments(memory);
5555
}
5656

57-
await AskSingleQuestion(memory, "What formats does KM support?");
57+
await AskSingleQuestion(memory, "What is Kernel Memory");
5858
await StartUserChatSession(memory);
5959
}
6060

LLama.Examples/LLama.Examples.csproj

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515

1616
<ItemGroup>
1717
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.3" />
18-
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.97.250211.1" />
18+
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.98.250323.1" />
1919
<PackageReference Include="Microsoft.SemanticKernel" Version="1.44.0" />
20-
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.6.2-alpha" />
20+
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Memory" Version="1.44.0-alpha" />
2121
<PackageReference Include="NAudio" Version="2.2.1" />
2222
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
23-
<PackageReference Include="Spectre.Console" Version="0.49.1" />
24-
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.49.1" />
25-
<PackageReference Include="Whisper.net" Version="1.7.4" />
26-
<PackageReference Include="Whisper.net.Runtime" Version="1.7.4" />
23+
<PackageReference Include="Spectre.Console" Version="0.50.0" />
24+
<PackageReference Include="Spectre.Console.ImageSharp" Version="0.50.0" />
25+
<PackageReference Include="Whisper.net" Version="1.8.1" />
26+
<PackageReference Include="Whisper.net.Runtime" Version="1.8.1" />
2727
<PackageReference Include="Whisper.net.Runtime.Clblast" Version="1.5.0" />
2828
<PackageReference Include="Whisper.net.Runtime.CoreML" Version="1.7.4" />
2929
<PackageReference Include="Whisper.net.Runtime.Cublas" Version="1.5.0" />

LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config)
3131

3232
var @params = new ModelParams(config.ModelPath)
3333
{
34-
ContextSize = config.ContextSize,
35-
GpuLayerCount = config.GpuLayerCount ?? 20,
36-
34+
ContextSize = config?.ContextSize ?? 2048,
35+
GpuLayerCount = config?.GpuLayerCount ?? 20,
36+
//Embeddings = true,
37+
MainGpu = config?.MainGpu ?? 0,
38+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
3739
PoolingType = LLamaPoolingType.Mean,
3840
};
3941

@@ -54,11 +56,11 @@ public LLamaSharpTextEmbeddingGenerator(LLamaSharpConfig config, LLamaWeights we
5456

5557
var @params = new ModelParams(config.ModelPath)
5658
{
57-
ContextSize = config.ContextSize ?? 2048,
58-
GpuLayerCount = config.GpuLayerCount ?? 20,
59-
Embeddings = true,
60-
MainGpu = config.MainGpu,
61-
SplitMode = config.SplitMode,
59+
ContextSize = config?.ContextSize ?? 2048,
60+
GpuLayerCount = config?.GpuLayerCount ?? 20,
61+
//Embeddings = true,
62+
MainGpu = config?.MainGpu ?? 0,
63+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
6264
PoolingType = LLamaPoolingType.Mean,
6365
};
6466
_weights = weights;

LLama.KernelMemory/LlamaSharpTextGenerator.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ public LlamaSharpTextGenerator(LLamaSharpConfig config)
3232
{
3333
var parameters = new ModelParams(config.ModelPath)
3434
{
35-
ContextSize = config.ContextSize ?? 2048,
36-
GpuLayerCount = config.GpuLayerCount ?? 20,
35+
ContextSize = config?.ContextSize ?? 2048,
36+
GpuLayerCount = config?.GpuLayerCount ?? 20,
37+
MainGpu = config?.MainGpu ?? 0,
38+
SplitMode = config?.SplitMode ?? LLama.Native.GPUSplitMode.None,
3739
};
3840
_weights = LLamaWeights.LoadFromFile(parameters);
3941
_context = _weights.CreateContext(parameters);

LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
</PropertyGroup>
3535

3636
<ItemGroup>
37-
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.44.0" />
37+
<PackageReference Include="Microsoft.SemanticKernel.Abstractions" Version="1.48.0" />
3838
</ItemGroup>
3939

4040
<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">

LLama.Unittest/Constants.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ public static int CIGpuLayerCount
2121
{
2222
get
2323
{
24-
if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
24+
//if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
2525
{
2626
#if DEBUG
2727
return 20;
2828
#else
2929
return 0;
3030
#endif
3131
}
32-
else return 20;
32+
//else return 20;
3333
}
3434
}
3535
}

LLama.Unittest/KernelMemory/ITextTokenizerTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
2222
_testOutputHelper = testOutputHelper;
2323

2424
_infParams = new() { AntiPrompts = ["\n\n"] };
25-
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512 };
25+
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams, ContextSize = 512, SplitMode = LLama.Native.GPUSplitMode.Layer };
2626

2727
testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
2828
}

0 commit comments

Comments
 (0)