Replies: 2 comments 2 replies
-
So figured out how to create a custom Phi3 provider: using Microsoft.SemanticKernel;
using System.Text;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Services;
using Microsoft.ML.OnnxRuntimeGenAI;
using System.Runtime.CompilerServices;
using System.Diagnostics;
namespace Phi3SemanticKernel
{
public class Phi3ChatCompletionService : IChatCompletionService
{
private static Model? model;
private static MultiModalProcessor? processor;
public Phi3ChatCompletionService(string modelPath)
{
model = new Model(modelPath);
processor = new MultiModalProcessor(model);
}
private async IAsyncEnumerable<StreamingChatMessageContent> Answer(ChatHistory history, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
if (processor is not null)
{
await Task.Delay(1).ConfigureAwait(false);
using var tokenizerStream = processor!.CreateStream();
StringBuilder prompt = new StringBuilder();
foreach(var item in history)
prompt.Append($"<|{item.Role}|>{item.Content}<|end|>");
prompt.Append("<|assistant|>");
var fullPrompt = prompt.ToString();
var inputTensors = processor.ProcessImages(fullPrompt, null);
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("max_length", 3072);
generatorParams.SetInputs(inputTensors);
// generate response
using var generator = new Generator(model, generatorParams);
while (!generator.IsDone())
{
generator.ComputeLogits();
generator.GenerateNextToken();
var seq = generator.GetSequence(0)[^1];
var str = tokenizerStream.Decode(seq);
if (cancellationToken.IsCancellationRequested)
break;
yield return new StreamingChatMessageContent(AuthorRole.Assistant, str)
;
await Task.Yield();
if (cancellationToken.IsCancellationRequested)
break;
}
}
}
IReadOnlyDictionary<string, object?> IAIService.Attributes => throw new NotImplementedException();
Task<IReadOnlyList<ChatMessageContent>> IChatCompletionService.GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
IAsyncEnumerable<StreamingChatMessageContent> IChatCompletionService.GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
return Answer(chatHistory, cancellationToken);
}
}
} However I'm not quite sure where to continue to implement function calling which is what I'm really interested in with the semantic kernel |
Beta Was this translation helpful? Give feedback.
0 replies
-
We have created the ONNX connector but at this time we don't have samples created. We'll have these published out shortly. https://github.com/microsoft/semantic-kernel/tree/main/dotnet/src/Connectors/Connectors.Onnx |
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Every sample I see requires you to fire up llama and run a localhost website to access your local models. This makes for an awkward developer and user experience, and it also risk hitting port conflicts.
When I use onnxruntime-genai I can load and use my Phi3 model directly, without the need of a webservice running. Is this possible with semantic kernel?
This issue here might be there to address that, but the issue has no description so hard to tell: #6619 @matthewbolanos
Beta Was this translation helpful? Give feedback.
All reactions