Skip to content

Commit d83f1a0

Browse files
committed
Adds an option to apply the chat template to prompts when using StatelessExecutor.
Also updates LLamaStatelessExecutor.cs to use the new functionality as an example.
1 parent 7782869 commit d83f1a0

File tree

2 files changed

+26
-1
lines changed

2 files changed

+26
-1
lines changed

LLama.Examples/Examples/StatelessModeExecute.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ public static async Task Run()
1515
GpuLayerCount = 5
1616
};
1717
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
18-
var ex = new StatelessExecutor(model, parameters);
18+
var ex = new StatelessExecutor(model, parameters)
19+
{
20+
ApplyTemplate = true,
21+
SystemMessage = "You are a helpful bot."
22+
};
1923

2024
Console.ForegroundColor = ConsoleColor.Yellow;
2125
Console.WriteLine("The executor has been enabled. In this example, the inference is an one-time job. That says, the previous input and response has " +

LLama/LLamaStatelessExecutor.cs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Threading;
88
using LLama.Exceptions;
99
using LLama.Native;
10+
using LLama.Transformers;
1011
using Microsoft.Extensions.Logging;
1112

1213
namespace LLama
@@ -37,6 +38,17 @@ public class StatelessExecutor
3738
/// </summary>
3839
public LLamaContext Context { get; private set; }
3940

41+
/// <summary>
42+
/// If true, applies the default template to the prompt as defined in the rules for <a href="https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template">llama_chat_apply_template</a> template.
43+
/// </summary>
44+
public bool ApplyTemplate { get; init; }
45+
46+
/// <summary>
47+
/// The system message to use with the prompt. Only used when <see cref="ApplyTemplate" /> is true.
48+
/// </summary>
49+
public string? SystemMessage { get; init; }
50+
51+
4052
/// <summary>
4153
/// Create a new stateless executor which will use the given model
4254
/// </summary>
@@ -79,6 +91,15 @@ public async IAsyncEnumerable<string> InferAsync(string prompt, IInferenceParams
7991
var decoder = new StreamingTokenDecoder(Context);
8092
var antiprocessor = new AntipromptProcessor(inferenceParams.AntiPrompts);
8193

94+
if (ApplyTemplate)
95+
{
96+
var template = new LLamaTemplate(_weights.NativeHandle) { AddAssistant = true };
97+
if (SystemMessage != null) template.Add("system", SystemMessage);
98+
99+
template.Add("user", prompt);
100+
prompt = PromptTemplateTransformer.ToModelPrompt(template);
101+
}
102+
82103
// Tokenize the prompt
83104
var tokens = Context.Tokenize(prompt, special: true).ToList();
84105

0 commit comments

Comments
 (0)