Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ ollama = [
"ollama ~= 0.4"
]
onnx = [
"onnxruntime-genai ~= 0.7"
"onnxruntime-genai ~= 0.9"
]
pandas = [
"pandas ~= 2.2"
Expand Down
8 changes: 2 additions & 6 deletions python/samples/concepts/setup/chat_completion_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,13 +332,9 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[
Please refer to the Semantic Kernel Python documentation for more information:
https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
"""
from semantic_kernel.connectors.ai.onnx import (
OnnxGenAIChatCompletion,
OnnxGenAIPromptExecutionSettings,
ONNXTemplate,
)
from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings

chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id)
chat_service = OnnxGenAIChatCompletion(template="phi4mm", service_id=service_id)
request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id)

return chat_service, request_settings
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) Microsoft. All rights reserved.

import json
import logging
import sys
from collections.abc import AsyncGenerator
Expand All @@ -10,7 +11,6 @@
else:
from typing_extensions import override # pragma: no cover


from pydantic import ValidationError

from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
Expand All @@ -20,6 +20,7 @@
from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate, apply_template
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.contents import (
AudioContent,
ChatHistory,
ChatMessageContent,
ImageContent,
Expand All @@ -37,12 +38,12 @@
class OnnxGenAIChatCompletion(ChatCompletionClientBase, OnnxGenAICompletionBase):
"""OnnxGenAI text completion service."""

template: ONNXTemplate
template: ONNXTemplate | None
SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False

def __init__(
self,
template: ONNXTemplate,
template: ONNXTemplate | None = None,
ai_model_path: str | None = None,
ai_model_id: str | None = None,
env_file_path: str | None = None,
Expand Down Expand Up @@ -80,6 +81,12 @@ def __init__(

super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs)

if self.enable_multi_modality and template is None:
raise ServiceInitializationError(
"When using a multi-modal model, a template must be specified."
" Please provide a ONNXTemplate in the constructor."
)

@override
async def _inner_get_chat_message_contents(
self,
Expand All @@ -101,7 +108,8 @@ async def _inner_get_chat_message_contents(
assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec
prompt = self._prepare_chat_history_for_request(chat_history)
images = self._get_images_from_history(chat_history)
choices = await self._generate_next_token(prompt, settings, images)
audios = self._get_audios_from_history(chat_history)
choices = await self._generate_next_token(prompt, settings, images=images, audios=audios)
return [self._create_chat_message_content(choice) for choice in choices]

@override
Expand All @@ -127,7 +135,8 @@ async def _inner_get_streaming_chat_message_contents(
assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec
prompt = self._prepare_chat_history_for_request(chat_history)
images = self._get_images_from_history(chat_history)
async for chunk in self._generate_next_token_async(prompt, settings, images):
audios = self._get_audios_from_history(chat_history)
async for chunk in self._generate_next_token_async(prompt, settings, images=images, audios=audios):
yield [
self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt)
for choice_index, new_token in enumerate(chunk)
Expand Down Expand Up @@ -159,9 +168,21 @@ def _create_streaming_chat_message_content(
def _prepare_chat_history_for_request(
self, chat_history: ChatHistory, role_key: str = "role", content_key: str = "content"
) -> Any:
return apply_template(chat_history, self.template)
if self.template:
return apply_template(chat_history, self.template)
return self.tokenizer.apply_chat_template(
json.dumps(self._chat_messages_to_dicts(chat_history)),
add_generation_prompt=True,
)

def _chat_messages_to_dicts(self, chat_history: "ChatHistory") -> list[dict[str, Any]]:
return [
message.to_dict(role_key="role", content_key="content")
for message in chat_history.messages
if isinstance(message, ChatMessageContent)
]

def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent | None:
def _get_images_from_history(self, chat_history: "ChatHistory") -> list[ImageContent] | None:
images = []
for message in chat_history.messages:
for image in message.items:
Expand All @@ -174,11 +195,22 @@ def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent
raise ServiceInvalidExecutionSettingsError(
"Image Content URI needs to be set, because onnx can only work with file paths"
)
# Currently Onnx Runtime only supports one image
# Later we will add support for multiple images
if len(images) > 1:
raise ServiceInvalidExecutionSettingsError("The model does not support more than one image")
return images[-1] if images else None
return images if len(images) else None

def _get_audios_from_history(self, chat_history: "ChatHistory") -> list[AudioContent] | None:
audios = []
for message in chat_history.messages:
for audio in message.items:
if isinstance(audio, AudioContent):
if not self.enable_multi_modality:
raise ServiceInvalidExecutionSettingsError("The model does not support multi-modality")
if audio.uri:
audios.append(audio)
else:
raise ServiceInvalidExecutionSettingsError(
"Audio Content URI needs to be set, because onnx can only work with file paths"
)
return audios if len(audios) else None

@override
def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any

from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings
from semantic_kernel.contents import ImageContent
from semantic_kernel.contents import AudioContent, ImageContent
from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidResponseError
from semantic_kernel.kernel_pydantic import KernelBaseModel

Expand Down Expand Up @@ -50,7 +50,7 @@ def __init__(self, ai_model_path: str, **kwargs) -> None:
tokenizer = OnnxRuntimeGenAi.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()
except Exception as ex:
raise ServiceInitializationError("Failed to initialize OnnxTextCompletion service", ex) from ex
raise ServiceInitializationError("Failed to initialize OnnxCompletion service", ex) from ex

super().__init__(
model=model,
Expand All @@ -64,25 +64,27 @@ async def _generate_next_token_async(
self,
prompt: str,
settings: OnnxGenAIPromptExecutionSettings,
image: ImageContent | None = None,
images: list[ImageContent] | None = None,
audios: list[AudioContent] | None = None,
) -> AsyncGenerator[list[str], Any]:
try:
params = OnnxRuntimeGenAi.GeneratorParams(self.model)
params.set_search_options(**settings.prepare_settings_dict())
generator = OnnxRuntimeGenAi.Generator(self.model, params)
if not self.enable_multi_modality:
input_tokens = self.tokenizer.encode(prompt)
params.input_ids = input_tokens
generator.append_tokens(input_tokens)
else:
if image is not None:
# With the use of Pybind there is currently no way to load images from bytes
# We can only open images from a file path currently
image = OnnxRuntimeGenAi.Images.open(str(image.uri))
input_tokens = self.tokenizer(prompt, images=image)
params.set_inputs(input_tokens)
generator = OnnxRuntimeGenAi.Generator(self.model, params)
# With the use of Pybind in ONNX there is currently no way to load images from bytes
# We can only open images & audios from a file path currently
if images is not None:
images = OnnxRuntimeGenAi.Images.open(*[str(image.uri) for image in images])
if audios is not None:
audios = OnnxRuntimeGenAi.Audios.open(*[str(audio.uri) for audio in audios])
input_tokens = self.tokenizer(prompt, images=images, audios=audios)
generator.set_inputs(input_tokens)

while not generator.is_done():
generator.compute_logits()
generator.generate_next_token()
new_token_choices = [self.tokenizer_stream.decode(token) for token in generator.get_next_tokens()]
yield new_token_choices
Expand All @@ -94,10 +96,11 @@ async def _generate_next_token(
self,
prompt: str,
settings: OnnxGenAIPromptExecutionSettings,
image: ImageContent | None = None,
images: list[ImageContent] | None = None,
audios: list[AudioContent] | None = None,
):
token_choices: list[str] = []
async for new_token_choice in self._generate_next_token_async(prompt, settings, image):
async for new_token_choice in self._generate_next_token_async(prompt, settings, images, audios=audios):
# zip only works if the lists are the same length
if len(token_choices) == 0:
token_choices = new_token_choice
Expand Down
59 changes: 57 additions & 2 deletions python/semantic_kernel/connectors/ai/onnx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from enum import Enum

from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
from semantic_kernel.contents.audio_content import AudioContent
from semantic_kernel.exceptions import ServiceException, ServiceInvalidRequestError


Expand All @@ -19,6 +20,8 @@ class ONNXTemplate(str, Enum):

PHI3 = "phi3"
PHI3V = "phi3v"
PHI4 = "phi4"
PHI4MM = "phi4mm"
GEMMA = "gemma"
LLAMA = "llama"
NONE = "none"
Expand All @@ -39,9 +42,11 @@ def apply_template(history: ChatHistory, template: ONNXTemplate) -> str:
"""
template_functions = {
ONNXTemplate.PHI3: phi3_template,
ONNXTemplate.PHI4: phi4_template,
ONNXTemplate.GEMMA: gemma_template,
ONNXTemplate.LLAMA: llama_template,
ONNXTemplate.PHI3V: phi3v_template,
ONNXTemplate.PHI4MM: phi4mm_template,
ONNXTemplate.NONE: lambda text: text,
}

Expand All @@ -67,6 +72,22 @@ def phi3_template(history: ChatHistory) -> str:
return phi3_input


def phi4_template(history: ChatHistory) -> str:
"""Generates a formatted string from the chat history for use with the phi4 model.

Args:
history (ChatHistory): An object containing the chat history with a list of messages.

Returns:
str: A formatted string where each message is prefixed with the role and suffixed with an end marker.
"""
phi4_input = ""
for message in history.messages:
phi4_input += f"<|{message.role.value}|>\n{message.content}<|end|>\n"
phi4_input += "<|assistant|>\n"
return phi4_input


def phi3v_template(history: ChatHistory) -> str:
"""Generates a formatted string from a given chat history for use with the phi3v model.

Expand All @@ -78,22 +99,56 @@ def phi3v_template(history: ChatHistory) -> str:
the role of each message (system, user, assistant) and the type of content (text, image).
"""
phi3v_input = ""
image_count = 0
for message in history.messages:
if message.role == AuthorRole.SYSTEM:
phi3v_input += f"<|system|>\n{message.content}<|end|>\n"
if message.role == AuthorRole.USER:
for item in message.items:
if isinstance(item, TextContent):
phi3v_input += f"<|user|>\n{item.text}<|end|>\n"
# Currently only one image is supported in Onnx
if isinstance(item, ImageContent):
phi3v_input += "<|image_1|>\n"
phi3v_input += f"<|image_{image_count + 1}|>\n"
image_count += 1
if message.role == AuthorRole.ASSISTANT:
phi3v_input += f"<|assistant|>\n{message.content}<|end|>\n"
phi3v_input += "<|assistant|>\n"
return phi3v_input


def phi4mm_template(history: ChatHistory) -> str:
"""Generates a formatted string from a given chat history for use with the phi4mm model.

Args:
history (ChatHistory): An object containing the chat history with messages.

Returns:
str: A formatted string representing the chat history, with special tokens indicating
the role of each message (system, user, assistant) and the type of content (text, image).
"""
phi4mm_input = ""
image_count = 0
audio_count = 0
for message in history.messages:
if message.role == AuthorRole.SYSTEM:
phi4mm_input += f"<|system|>\n{message.content}<|end|>\n"
if message.role == AuthorRole.USER:
for item in message.items:
if isinstance(item, TextContent):
phi4mm_input += f"<|user|>\n{item.text}<|end|>\n"
# Currently only one image is supported in Onnx
if isinstance(item, ImageContent):
phi4mm_input += f"<|image_{image_count + 1}|>\n"
image_count += 1
if isinstance(item, AudioContent):
phi4mm_input += f"<|audio_{audio_count + 1}|>\n"
audio_count += 1
if message.role == AuthorRole.ASSISTANT:
phi4mm_input += f"<|assistant|>\n{message.content}<|end|>\n"
phi4mm_input += "<|assistant|>\n"
return phi4mm_input


def gemma_template(history: ChatHistory) -> str:
"""Generates a formatted string for the Gemma model based on the provided chat history.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ def test_onnx_chat_completion_with_invalid_model():
)


def test_onnx_chat_completion_without_prompt_template():
with pytest.raises(TypeError):
@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision))
def test_onnx_chat_completion_with_multimodality_without_prompt_template(gen_ai_config_vision):
with pytest.raises(ServiceInitializationError):
OnnxGenAIChatCompletion()


Expand Down Expand Up @@ -147,7 +148,7 @@ def patch_open(*args, **kwargs):
)

last_image = chat_completion._get_images_from_history(history)
assert last_image == image_content
assert last_image == [image_content]


@patch("onnxruntime_genai.Model")
Expand Down
Loading
Loading