Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ ollama = [
"ollama ~= 0.4"
]
onnx = [
"onnxruntime-genai ~= 0.7"
"onnxruntime-genai ~= 0.9"
]
pandas = [
"pandas ~= 2.2"
Expand Down
8 changes: 2 additions & 6 deletions python/samples/concepts/setup/chat_completion_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,13 +332,9 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[
Please refer to the Semantic Kernel Python documentation for more information:
https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
"""
from semantic_kernel.connectors.ai.onnx import (
OnnxGenAIChatCompletion,
OnnxGenAIPromptExecutionSettings,
ONNXTemplate,
)
from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings

chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id)
chat_service = OnnxGenAIChatCompletion(template="phi4mm", service_id=service_id)
request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id)

return chat_service, request_settings
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) Microsoft. All rights reserved.

import json
import logging
import sys
from collections.abc import AsyncGenerator
Expand All @@ -10,7 +11,6 @@
else:
from typing_extensions import override # pragma: no cover


from pydantic import ValidationError

from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
Expand All @@ -20,6 +20,7 @@
from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate, apply_template
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.contents import (
AudioContent,
ChatHistory,
ChatMessageContent,
ImageContent,
Expand All @@ -37,12 +38,12 @@
class OnnxGenAIChatCompletion(ChatCompletionClientBase, OnnxGenAICompletionBase):
"""OnnxGenAI text completion service."""

template: ONNXTemplate
template: ONNXTemplate | None
SUPPORTS_FUNCTION_CALLING: ClassVar[bool] = False

def __init__(
self,
template: ONNXTemplate,
template: ONNXTemplate | None = None,
ai_model_path: str | None = None,
ai_model_id: str | None = None,
env_file_path: str | None = None,
Expand Down Expand Up @@ -80,6 +81,12 @@ def __init__(

super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs)

if self.enable_multi_modality and template is None:
raise ServiceInitializationError(
"When using a multi-modal model, a template must be specified."
" Please provide a ONNXTemplate in the constructor."
)

@override
async def _inner_get_chat_message_contents(
self,
Expand All @@ -101,7 +108,8 @@ async def _inner_get_chat_message_contents(
assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec
prompt = self._prepare_chat_history_for_request(chat_history)
images = self._get_images_from_history(chat_history)
choices = await self._generate_next_token(prompt, settings, images)
audios = self._get_audios_from_history(chat_history)
choices = await self._generate_next_token(prompt, settings, images=images, audios=audios)
return [self._create_chat_message_content(choice) for choice in choices]

@override
Expand All @@ -127,7 +135,8 @@ async def _inner_get_streaming_chat_message_contents(
assert isinstance(settings, OnnxGenAIPromptExecutionSettings) # nosec
prompt = self._prepare_chat_history_for_request(chat_history)
images = self._get_images_from_history(chat_history)
async for chunk in self._generate_next_token_async(prompt, settings, images):
audios = self._get_audios_from_history(chat_history)
async for chunk in self._generate_next_token_async(prompt, settings, images=images, audios=audios):
yield [
self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt)
for choice_index, new_token in enumerate(chunk)
Expand Down Expand Up @@ -159,9 +168,21 @@ def _create_streaming_chat_message_content(
def _prepare_chat_history_for_request(
self, chat_history: ChatHistory, role_key: str = "role", content_key: str = "content"
) -> Any:
return apply_template(chat_history, self.template)
if self.template:
return apply_template(chat_history, self.template)
return self.tokenizer.apply_chat_template(
json.dumps(self._chat_messages_to_dicts(chat_history)),
add_generation_prompt=True,
)

def _chat_messages_to_dicts(self, chat_history: "ChatHistory") -> list[dict[str, Any]]:
return [
message.to_dict(role_key="role", content_key="content")
for message in chat_history.messages
if isinstance(message, ChatMessageContent)
]

def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent | None:
def _get_images_from_history(self, chat_history: "ChatHistory") -> list[ImageContent] | None:
images = []
for message in chat_history.messages:
for image in message.items:
Expand All @@ -174,11 +195,22 @@ def _get_images_from_history(self, chat_history: "ChatHistory") -> ImageContent
raise ServiceInvalidExecutionSettingsError(
"Image Content URI needs to be set, because onnx can only work with file paths"
)
# Currently Onnx Runtime only supports one image
# Later we will add support for multiple images
if len(images) > 1:
raise ServiceInvalidExecutionSettingsError("The model does not support more than one image")
return images[-1] if images else None
return images if len(images) else None

def _get_audios_from_history(self, chat_history: "ChatHistory") -> list[AudioContent] | None:
audios = []
for message in chat_history.messages:
for audio in message.items:
if isinstance(audio, AudioContent):
if not self.enable_multi_modality:
raise ServiceInvalidExecutionSettingsError("The model does not support multi-modality")
if audio.uri:
audios.append(audio)
else:
raise ServiceInvalidExecutionSettingsError(
"Audio Content URI needs to be set, because onnx can only work with file paths"
)
return audios if len(audios) else None

@override
def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any

from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings
from semantic_kernel.contents import ImageContent
from semantic_kernel.contents import AudioContent, ImageContent
from semantic_kernel.exceptions import ServiceInitializationError, ServiceInvalidResponseError
from semantic_kernel.kernel_pydantic import KernelBaseModel

Expand Down Expand Up @@ -50,7 +50,7 @@ def __init__(self, ai_model_path: str, **kwargs) -> None:
tokenizer = OnnxRuntimeGenAi.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()
except Exception as ex:
raise ServiceInitializationError("Failed to initialize OnnxTextCompletion service", ex) from ex
raise ServiceInitializationError("Failed to initialize OnnxCompletion service", ex) from ex

super().__init__(
model=model,
Expand All @@ -64,25 +64,27 @@ async def _generate_next_token_async(
self,
prompt: str,
settings: OnnxGenAIPromptExecutionSettings,
image: ImageContent | None = None,
images: list[ImageContent] | None = None,
audios: list[AudioContent] | None = None,
) -> AsyncGenerator[list[str], Any]:
try:
params = OnnxRuntimeGenAi.GeneratorParams(self.model)
params.set_search_options(**settings.prepare_settings_dict())
generator = OnnxRuntimeGenAi.Generator(self.model, params)
if not self.enable_multi_modality:
input_tokens = self.tokenizer.encode(prompt)
params.input_ids = input_tokens
generator.append_tokens(input_tokens)
else:
if image is not None:
# With the use of Pybind there is currently no way to load images from bytes
# We can only open images from a file path currently
image = OnnxRuntimeGenAi.Images.open(str(image.uri))
input_tokens = self.tokenizer(prompt, images=image)
params.set_inputs(input_tokens)
generator = OnnxRuntimeGenAi.Generator(self.model, params)
# With the use of Pybind in ONNX there is currently no way to load images from bytes
# We can only open images & audios from a file path currently
if images is not None:
images = OnnxRuntimeGenAi.Images.open(*[str(image.uri) for image in images])
if audios is not None:
audios = OnnxRuntimeGenAi.Audios.open(*[str(audio.uri) for audio in audios])
input_tokens = self.tokenizer(prompt, images=images, audios=audios)
generator.set_inputs(input_tokens)

while not generator.is_done():
generator.compute_logits()
generator.generate_next_token()
new_token_choices = [self.tokenizer_stream.decode(token) for token in generator.get_next_tokens()]
yield new_token_choices
Expand All @@ -94,10 +96,11 @@ async def _generate_next_token(
self,
prompt: str,
settings: OnnxGenAIPromptExecutionSettings,
image: ImageContent | None = None,
images: list[ImageContent] | None = None,
audios: list[AudioContent] | None = None,
):
token_choices: list[str] = []
async for new_token_choice in self._generate_next_token_async(prompt, settings, image):
async for new_token_choice in self._generate_next_token_async(prompt, settings, images, audios=audios):
# zip only works if the lists are the same length
if len(token_choices) == 0:
token_choices = new_token_choice
Expand Down
59 changes: 57 additions & 2 deletions python/semantic_kernel/connectors/ai/onnx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from enum import Enum

from semantic_kernel.contents import AuthorRole, ChatHistory, ImageContent, TextContent
from semantic_kernel.contents.audio_content import AudioContent
from semantic_kernel.exceptions import ServiceException, ServiceInvalidRequestError


Expand All @@ -19,6 +20,8 @@ class ONNXTemplate(str, Enum):

PHI3 = "phi3"
PHI3V = "phi3v"
PHI4 = "phi4"
PHI4MM = "phi4mm"
GEMMA = "gemma"
LLAMA = "llama"
NONE = "none"
Expand All @@ -39,9 +42,11 @@ def apply_template(history: ChatHistory, template: ONNXTemplate) -> str:
"""
template_functions = {
ONNXTemplate.PHI3: phi3_template,
ONNXTemplate.PHI4: phi4_template,
ONNXTemplate.GEMMA: gemma_template,
ONNXTemplate.LLAMA: llama_template,
ONNXTemplate.PHI3V: phi3v_template,
ONNXTemplate.PHI4MM: phi4mm_template,
ONNXTemplate.NONE: lambda text: text,
}

Expand All @@ -67,6 +72,22 @@ def phi3_template(history: ChatHistory) -> str:
return phi3_input


def phi4_template(history: ChatHistory) -> str:
"""Generates a formatted string from the chat history for use with the phi4 model.

Args:
history (ChatHistory): An object containing the chat history with a list of messages.

Returns:
str: A formatted string where each message is prefixed with the role and suffixed with an end marker.
"""
phi4_input = ""
for message in history.messages:
phi4_input += f"<|{message.role.value}|>\n{message.content}<|end|>\n"
phi4_input += "<|assistant|>\n"
return phi4_input


def phi3v_template(history: ChatHistory) -> str:
"""Generates a formatted string from a given chat history for use with the phi3v model.

Expand All @@ -78,22 +99,56 @@ def phi3v_template(history: ChatHistory) -> str:
the role of each message (system, user, assistant) and the type of content (text, image).
"""
phi3v_input = ""
image_count = 0
for message in history.messages:
if message.role == AuthorRole.SYSTEM:
phi3v_input += f"<|system|>\n{message.content}<|end|>\n"
if message.role == AuthorRole.USER:
for item in message.items:
if isinstance(item, TextContent):
phi3v_input += f"<|user|>\n{item.text}<|end|>\n"
# Currently only one image is supported in Onnx
if isinstance(item, ImageContent):
phi3v_input += "<|image_1|>\n"
phi3v_input += f"<|image_{image_count + 1}|>\n"
image_count += 1
if message.role == AuthorRole.ASSISTANT:
phi3v_input += f"<|assistant|>\n{message.content}<|end|>\n"
phi3v_input += "<|assistant|>\n"
return phi3v_input


def phi4mm_template(history: ChatHistory) -> str:
"""Generates a formatted string from a given chat history for use with the phi4mm model.

Args:
history (ChatHistory): An object containing the chat history with messages.

Returns:
str: A formatted string representing the chat history, with special tokens indicating
the role of each message (system, user, assistant) and the type of content (text, image).
"""
phi4mm_input = ""
image_count = 0
audio_count = 0
for message in history.messages:
if message.role == AuthorRole.SYSTEM:
phi4mm_input += f"<|system|>\n{message.content}<|end|>\n"
if message.role == AuthorRole.USER:
for item in message.items:
if isinstance(item, TextContent):
phi4mm_input += f"<|user|>\n{item.text}<|end|>\n"
# Currently only one image is supported in Onnx
if isinstance(item, ImageContent):
phi4mm_input += f"<|image_{image_count + 1}|>\n"
image_count += 1
if isinstance(item, AudioContent):
phi4mm_input += f"<|audio_{audio_count + 1}|>\n"
audio_count += 1
if message.role == AuthorRole.ASSISTANT:
phi4mm_input += f"<|assistant|>\n{message.content}<|end|>\n"
phi4mm_input += "<|assistant|>\n"
return phi4mm_input


def gemma_template(history: ChatHistory) -> str:
"""Generates a formatted string for the Gemma model based on the provided chat history.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ def test_onnx_chat_completion_with_invalid_model():
)


def test_onnx_chat_completion_without_prompt_template():
with pytest.raises(TypeError):
@patch("builtins.open", new_callable=mock_open, read_data=json.dumps(gen_ai_config_vision))
def test_onnx_chat_completion_with_multimodality_without_prompt_template(gen_ai_config_vision):
with pytest.raises(ServiceInitializationError):
OnnxGenAIChatCompletion()


Expand Down Expand Up @@ -147,7 +148,7 @@ def patch_open(*args, **kwargs):
)

last_image = chat_completion._get_images_from_history(history)
assert last_image == image_content
assert last_image == [image_content]


@patch("onnxruntime_genai.Model")
Expand Down
Loading
Loading