Skip to content

Commit f7fdaac

Browse files
committed
Memory efficient context handling
1 parent 17cb2a0 commit f7fdaac

File tree

3 files changed

+30
-38
lines changed

3 files changed

+30
-38
lines changed

LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using LLama.Native;
44
using Microsoft.KernelMemory;
55
using Microsoft.KernelMemory.AI;
6+
using System.Text;
67

78
namespace LLamaSharp.KernelMemory
89
{
@@ -106,20 +107,30 @@ public async Task<Embedding> GenerateEmbeddingAsync(string text, CancellationTok
106107
}
107108

108109
/// <summary>
109-
/// Count tokens in the input text
110+
/// Count the tokens in the input text
110111
/// </summary>
111112
/// <param name="text">input text</param>
113+
/// <param name="parameters">context parameters</param>
112114
/// <returns></returns>
113-
public int CountTokens(string text) => _weights?.CountTokens(text, @params!) ?? 0;
115+
public int CountTokens(string text)
116+
{
117+
return _weights!.Tokenize(text, true, special: true, Encoding.UTF8).Length;
118+
}
114119

115120
/// <summary>
116121
/// Get the list of tokens for the input text
117122
/// </summary>
118123
/// <param name="text">Input string to be tokenized</param>
124+
/// <param name="parameters">Context parameters</param>
119125
/// <returns>Read-only list of tokens for the input test</returns>
120126
/// <remarks>
121127
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
122-
/// <see cref="CountTokens(string)"/>
123-
public IReadOnlyList<string> GetTokens(string text) => _weights?.GetTokens(text, @params!) ?? new List<string>();
128+
/// <see cref="CountTokens(string, IContextParams)"/>
129+
public IReadOnlyList<string> GetTokens(string text)
130+
{
131+
var numericTokens = _weights!.Tokenize(text, true, special: true, Encoding.UTF8);
132+
var decoder = new StreamingTokenDecoder(Encoding.UTF8, _weights);
133+
return numericTokens.Select(x => { decoder.Add(x); return decoder.Read(); }).ToList();
134+
}
124135
}
125136
}

LLama.KernelMemory/LlamaSharpTextGenerator.cs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using LLama.Sampling;
44
using Microsoft.KernelMemory;
55
using Microsoft.KernelMemory.AI;
6+
using System.Text;
67

78
namespace LLamaSharp.KernelMemory
89
{
@@ -125,20 +126,30 @@ private static InferenceParams OptionsToParams(TextGenerationOptions options, In
125126
}
126127

127128
/// <summary>
128-
/// Count tokens in the input text
129+
/// Count the tokens in the input text
129130
/// </summary>
130131
/// <param name="text">input text</param>
132+
/// <param name="parameters">context parameters</param>
131133
/// <returns></returns>
132-
public int CountTokens(string text) => _weights?.CountTokens(text, @params!) ?? 0;
134+
public int CountTokens(string text)
135+
{
136+
return _weights!.Tokenize(text, true, special: true, Encoding.UTF8).Length;
137+
}
133138

134139
/// <summary>
135140
/// Get the list of tokens for the input text
136141
/// </summary>
137142
/// <param name="text">Input string to be tokenized</param>
143+
/// <param name="parameters">Context parameters</param>
138144
/// <returns>Read-only list of tokens for the input test</returns>
139145
/// <remarks>
140146
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
141-
/// <see cref="CountTokens(string)"/>
142-
public IReadOnlyList<string> GetTokens(string text) => _weights?.GetTokens(text, @params!) ?? new List<string>();
147+
/// <see cref="CountTokens(string, IContextParams)"/>
148+
public IReadOnlyList<string> GetTokens(string text)
149+
{
150+
var numericTokens = _weights!.Tokenize(text, true, special: true, Encoding.UTF8);
151+
var decoder = new StreamingTokenDecoder(Encoding.UTF8, _weights);
152+
return numericTokens.Select(x => { decoder.Add(x); return decoder.Read(); }).ToList();
153+
}
143154
}
144155
}

LLama/LLamaWeights.cs

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -166,35 +166,5 @@ public LLamaToken[] Tokenize(string text, bool add_bos, bool special, Encoding e
166166
{
167167
return NativeHandle.Tokenize(text, add_bos, special, encoding);
168168
}
169-
170-
/// <summary>
171-
/// Count the tokens in the input text
172-
/// </summary>
173-
/// <param name="text">input text</param>
174-
/// <param name="parameters">context parameters</param>
175-
/// <returns></returns>
176-
public int CountTokens(string text, IContextParams parameters)
177-
{
178-
using var context = CreateContext(parameters);
179-
var count = context.Tokenize(text, special: true).Length;
180-
return count;
181-
}
182-
183-
/// <summary>
184-
/// Get the list of tokens for the input text
185-
/// </summary>
186-
/// <param name="text">Input string to be tokenized</param>
187-
/// <param name="parameters">Context parameters</param>
188-
/// <returns>Read-only list of tokens for the input test</returns>
189-
/// <remarks>
190-
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
191-
/// <see cref="CountTokens(string, IContextParams)"/>
192-
public IReadOnlyList<string> GetTokens(string text, IContextParams parameters)
193-
{
194-
using var context = CreateContext(parameters);
195-
var numericTokens = context.Tokenize(text, special: true);
196-
var decoder = new StreamingTokenDecoder(context);
197-
return numericTokens.Select(x => { decoder.Add(x); return decoder.Read(); }).ToList();
198-
}
199169
}
200170
}

0 commit comments

Comments
 (0)