Skip to content

Commit 992aec8

Browse files
committed
Update to 11dd5a44eb180e1d69fac24d3852b5222d66fb7f
1 parent 81b9481 commit 992aec8

File tree

7 files changed

+47
-48
lines changed

7 files changed

+47
-48
lines changed

LLama/Extensions/IContextParamsExtensions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ public static void ToLlamaContextParams(this IContextParams @params, out LLamaCo
6060
result.swa_full = @params.SwaFull.Value;
6161
if (@params.OpOffload.HasValue)
6262
result.op_offload = @params.OpOffload.Value;
63+
if (@params.KVUnified.HasValue)
64+
result.kv_unified = @params.KVUnified.Value;
6365
}
6466

6567
private static int Threads(int? value)

LLama/Native/LLamaContextParams.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,19 @@ public bool swa_full
201201
}
202202
private sbyte _swa_full;
203203

204+
/// <summary>
205+
/// use a unified buffer across the input sequences when computing the attention.
206+
/// try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix
207+
/// <br />
208+
/// ref: https://github.com/ggml-org/llama.cpp/pull/14363
209+
/// </summary>
210+
public bool kv_unified
211+
{
212+
readonly get => Convert.ToBoolean(_kv_unified);
213+
set => _kv_unified = Convert.ToSByte(value);
214+
}
215+
private sbyte _kv_unified;
216+
204217
/// <summary>
205218
/// Get the default LLamaContextParams
206219
/// </summary>

LLama/Native/LLamaTimings.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ public struct LLamaPerfContextTimings
3838
/// number of eval calls
3939
/// </summary>
4040
private int n_eval;
41+
42+
/// <summary>
43+
/// number of times a ggml compute graph had been reused
44+
/// </summary>
45+
private int n_reused;
4146

4247
/// <summary>
4348
/// Timestamp when reset was last called

LLama/Native/LLamaVocabNative.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ internal struct LLamaVocabNative
9494
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
9595
public static extern unsafe LLamaToken llama_vocab_pad(LLamaVocabNative* vocab);
9696

97+
/// <summary>
98+
/// mask
99+
/// </summary>
100+
/// <param name="vocab"></param>
101+
/// <returns></returns>
102+
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
103+
public static extern unsafe LLamaToken llama_vocab_mask(LLamaVocabNative* vocab);
104+
97105
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
98106
public static extern unsafe LLamaToken llama_vocab_fim_pre(LLamaVocabNative* vocab);
99107

LLama/Native/LLamaVocabPreType.cs

Lines changed: 0 additions & 48 deletions
This file was deleted.

LLama/Native/LLamaVocabType.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,9 @@ public enum LLamaVocabType
3535
/// RWKV tokenizer based on greedy tokenization
3636
/// </summary>
3737
RWKV = 5,
38+
39+
/// <summary>
40+
/// PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming
41+
/// </summary>
42+
PLAMO2 = 6
3843
}

LLama/Native/SafeLlamaModelHandle.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,20 @@ public LLamaToken? Pad
818818
}
819819
}
820820

821+
/// <summary>
822+
/// Get the masking token for this model
823+
/// </summary>
824+
public LLamaToken? Mask
825+
{
826+
get
827+
{
828+
unsafe
829+
{
830+
return Normalize(LLamaVocabNative.llama_vocab_mask(VocabNative));
831+
}
832+
}
833+
}
834+
821835
/// <summary>
822836
/// Get the sentence separator token for this model
823837
/// </summary>

0 commit comments

Comments
 (0)