Skip to content

Commit c19f701

Browse files
authored
Support sending image data as part of a user message, using a new ImageUrl.load() method. Add sample and test. (#36042)
1 parent 147746b commit c19f701

File tree

9 files changed

+167
-16
lines changed

9 files changed

+167
-16
lines changed

sdk/ai/azure-ai-inference/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,13 @@ print(response.choices[0].message.content)
210210

211211
<!-- END SNIPPET -->
212212

213-
The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See sample [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`. See [sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) for usage of `UserMessage` that
214-
includes uploading an image.
213+
The following types or messages are supported: `SystemMessage`,`UserMessage`, `AssistantMessage`, `ToolMessage`. See also samples:
214+
215+
* [sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) for usage of `ToolMessage`.
216+
* [sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) for usage of `UserMessage` that
217+
includes sending an image URL.
218+
* [sample_chat_completions_with_image_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) for usage of `UserMessage` that
219+
includes sending image data read from a local file.
215220

216221
Alternatively, you can provide the messages as dictionary instead of using the strongly typed classes like `SystemMessage` and `UserMessage`:
217222

sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
https://github.com/Azure/autorest.python/issues/2619 (all clients).
1717
Otherwise intellisense did not show the patched public methods on the client object,
1818
when the client is defined using context manager ("with" statement).
19+
6. Add support for load() method in ImageUrl class (see /models/_patch.py).
1920
2021
"""
2122
import json

sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ._models import ChatCompletionsToolDefinition
1919
from ._models import ContentItem
2020
from ._models import ImageContentItem
21-
from ._models import ImageUrl
21+
from ._patch import ImageUrl
2222
from ._models import TextContentItem
2323
from ._models import ChatRequestMessage
2424
from ._models import ChatResponseMessage

sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,58 @@
77
Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
88
"""
99
import asyncio
10+
import base64
1011
import json
1112
import logging
1213
import queue
1314
import re
15+
import sys
1416

15-
from typing import List, AsyncIterator, Iterator
17+
from typing import List, AsyncIterator, Iterator, Optional, Union
1618
from azure.core.rest import HttpResponse, AsyncHttpResponse
19+
from ._models import ImageUrl as ImageUrlGenerated
1720
from .. import models as _models
1821

22+
if sys.version_info >= (3, 11):
23+
from typing import Self
24+
else:
25+
from typing_extensions import Self
26+
1927
logger = logging.getLogger(__name__)
2028

2129

30+
class ImageUrl(ImageUrlGenerated):
31+
32+
@classmethod
33+
def load(
34+
cls,
35+
*,
36+
image_file: str,
37+
image_format: str,
38+
detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None
39+
) -> Self:
40+
"""
41+
Create an ImageUrl object from a local image file. The method reads the image
42+
file and encodes it as a base64 string, which together with the image format
43+
is then used to format the JSON `url` value passed in the request payload.
44+
45+
:ivar image_file: The name of the local image file to load. Required.
46+
:vartype image_file: str
47+
:ivar image_format: The MIME type format of the image. For example: "jpeg", "png". Required.
48+
:vartype image_format: str
49+
:ivar detail: The evaluation quality setting to use, which controls relative prioritization of
50+
speed, token consumption, and accuracy. Known values are: "auto", "low", and "high".
51+
:vartype detail: str or ~azure.ai.inference.models.ImageDetailLevel
52+
:return: An ImageUrl object with the image data encoded as a base64 string.
53+
:rtype: ~azure.ai.inference.models.ImageUrl
54+
:raises FileNotFoundError when the image file could not be opened.
55+
"""
56+
with open(image_file, "rb") as f:
57+
image_data = base64.b64encode(f.read()).decode("utf-8")
58+
url = f"data:image/{image_format};base64,{image_data}"
59+
return cls(url=url, detail=detail)
60+
61+
2262
class BaseStreamingChatCompletions:
2363
"""A base class for the sync and async streaming chat completions responses, holding any common code
2464
to deserializes the Server Sent Events (SSE) response stream into chat completions updates, each one
@@ -106,7 +146,7 @@ def __init__(self, response: HttpResponse):
106146
def __iter__(self):
107147
return self
108148

109-
def __next__(self) -> _models.StreamingChatCompletionsUpdate:
149+
def __next__(self) -> "_models.StreamingChatCompletionsUpdate":
110150
while self._queue.empty() and not self._done:
111151
self._done = self._read_next_block()
112152
if self._queue.empty():
@@ -145,7 +185,7 @@ def __init__(self, response: AsyncHttpResponse):
145185
def __aiter__(self):
146186
return self
147187

148-
async def __anext__(self) -> _models.StreamingChatCompletionsUpdate:
188+
async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate":
149189
while self._queue.empty() and not self._done:
150190
self._done = await self._read_next_block_async()
151191
if self._queue.empty():
@@ -170,6 +210,7 @@ async def aclose(self) -> None:
170210

171211

172212
__all__: List[str] = [
213+
"ImageUrl",
173214
"StreamingChatCompletions",
174215
"AsyncStreamingChatCompletions",
175216
] # Add all objects you want publicly available to users at this package level

sdk/ai/azure-ai-inference/samples/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,12 @@ similarly for the other samples.
9292
|[sample_chat_completions_streaming.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming.py) | One chat completion operation using a synchronous client and streaming response. |
9393
|[sample_chat_completions_streaming_with_entra_id_auth.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_streaming_with_entra_id_auth.py) | One chat completion operation using a synchronous client and streaming response, using Entra ID authentication. This sample also shows setting the `azureml-model-deployment` HTTP request header, which may be required for some Managed Compute endpoint. |
9494
|[sample_chat_completions.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions.py) | One chat completion operation using a synchronous client. |
95-
|[sample_chat_completions_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py) | One chat completion operation using a synchronous client, which includes sending an input image. |
95+
|[sample_chat_completions_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py) | One chat completion operation using a synchronous client, which includes sending an input image URL. |
96+
|[sample_chat_completions_with_image_data.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_data.py) | One chat completion operation using a synchronous client, which includes sending input image data read from a local file. |
9697
|[sample_chat_completions_with_history.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_history.py) | Two chat completion operations using a synchronous client, with the second completion using chat history from the first. |
9798
|[sample_chat_completions_from_input_bytes.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_bytes.py) | One chat completion operation using a synchronous client, with input messages provided as `IO[bytes]`. |
9899
|[sample_chat_completions_from_input_json.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`) |
99-
|[sample_chat_completions_from_input_json_with_images.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image. |
100+
|[sample_chat_completions_from_input_json_with_image_url.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_image_url.py) | One chat completion operation using a synchronous client, with input messages provided as a dictionary (type `MutableMapping[str, Any]`). Includes sending an input image URL. |
100101
|[sample_chat_completions_with_tools.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_tools.py) | Shows how do use a tool (function) in chat completions, for an AI model that supports tools |
101102
|[sample_load_client.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_load_client.py) | Shows how do use the function `load_client` to create the appropriate synchronous client based on the provided endpoint URL. In this example, it creates a synchronous `ChatCompletionsClient`. |
102103
|[sample_get_model_info.py](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/ai/azure-ai-inference/samples/sample_get_model_info.py) | Get AI model information using the chat completions client. Similarly can be done with all other clients. |

sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_images.py renamed to sdk/ai/azure-ai-inference/samples/sample_chat_completions_from_input_json_with_image_url.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
This sample demonstrates how to get a chat completions response from
88
the service using a synchronous client, and directly providing the
99
JSON request body (containing input chat messages). The sample
10-
shows how to include an image in the input chat messages.
10+
shows how to include an image URL in the input chat messages.
1111
This sample will only work on AI models that support image input.
1212
1313
USAGE:
14-
python sample_chat_completions_from_input_json_with_image.py
14+
python sample_chat_completions_from_input_json_with_image_url.py
1515
1616
Set these two or three environment variables before running the sample:
1717
1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form
@@ -26,7 +26,7 @@
2626
# pyright: reportAttributeAccessIssue=false
2727

2828

29-
def sample_chat_completions_from_input_json_with_image():
29+
def sample_chat_completions_from_input_json_with_image_url():
3030
import os
3131
from azure.ai.inference import ChatCompletionsClient
3232
from azure.core.credentials import AzureKeyCredential
@@ -83,4 +83,4 @@ def sample_chat_completions_from_input_json_with_image():
8383

8484

8585
if __name__ == "__main__":
86-
sample_chat_completions_from_input_json_with_image()
86+
sample_chat_completions_from_input_json_with_image_url()
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# ------------------------------------
2+
# Copyright (c) Microsoft Corporation.
3+
# Licensed under the MIT License.
4+
# ------------------------------------
5+
"""
6+
DESCRIPTION:
7+
This sample demonstrates how to get a chat completions response from
8+
the service using a synchronous client. The sample shows how to load
9+
an image from a file and include it in the input chat messages.
10+
This sample will only work on AI models that support image input.
11+
12+
USAGE:
13+
python sample_chat_completions_with_image_data.py
14+
15+
Set these two or three environment variables before running the sample:
16+
1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form
17+
https://<your-deployment-name>.<your-azure-region>.inference.ai.azure.com
18+
where `your-deployment-name` is your unique AI Model deployment name, and
19+
`your-azure-region` is the Azure region where your model is deployed.
20+
2) CHAT_COMPLETIONS_KEY - Your model key (a 32-character string). Keep it secret.
21+
3) CHAT_COMPLETIONS_DEPLOYMENT_NAME - Optional. The value for the HTTP
22+
request header `azureml-model-deployment`.
23+
"""
24+
25+
26+
def sample_chat_completions_with_image_data():
27+
import os
28+
from azure.ai.inference import ChatCompletionsClient
29+
from azure.ai.inference.models import (
30+
SystemMessage, UserMessage, TextContentItem,
31+
ImageContentItem, ImageUrl, ImageDetailLevel
32+
)
33+
from azure.core.credentials import AzureKeyCredential
34+
35+
try:
36+
endpoint = os.environ["CHAT_COMPLETIONS_ENDPOINT"]
37+
key = os.environ["CHAT_COMPLETIONS_KEY"]
38+
except KeyError:
39+
print("Missing environment variable 'CHAT_COMPLETIONS_ENDPOINT' or 'CHAT_COMPLETIONS_KEY'")
40+
print("Set them before running this sample.")
41+
exit()
42+
43+
try:
44+
model_deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"]
45+
except KeyError:
46+
print("Could not read optional environment variable `CHAT_COMPLETIONS_DEPLOYMENT_NAME`.")
47+
print("HTTP request header `azureml-model-deployment` will not be set.")
48+
model_deployment = None
49+
50+
client = ChatCompletionsClient(
51+
endpoint=endpoint,
52+
credential=AzureKeyCredential(key),
53+
headers={"azureml-model-deployment": model_deployment},
54+
)
55+
56+
response = client.complete(
57+
messages=[
58+
SystemMessage(content="You are an AI assistant that describes images in details."),
59+
UserMessage(
60+
content=[
61+
TextContentItem(text="What's in this image?"),
62+
ImageContentItem(
63+
image_url=ImageUrl.load(
64+
image_file="sample1.png",
65+
image_format="png",
66+
detail=ImageDetailLevel.HIGH,
67+
),
68+
),
69+
],
70+
),
71+
],
72+
)
73+
74+
print(response.choices[0].message.content)
75+
76+
77+
def get_image_data_url(image_file: str, image_format: str) -> str:
78+
import base64
79+
try:
80+
with open(image_file, "rb") as f:
81+
image_data = base64.b64encode(f.read()).decode("utf-8")
82+
except FileNotFoundError:
83+
print(f"Could not read '{image_file}'.")
84+
print("Set the correct path to the image file before running this sample.")
85+
exit()
86+
return f"data:image/{image_format};base64,{image_data}"
87+
88+
89+
if __name__ == "__main__":
90+
sample_chat_completions_with_image_data()

sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_images.py renamed to sdk/ai/azure-ai-inference/samples/sample_chat_completions_with_image_url.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
DESCRIPTION:
77
This sample demonstrates how to get a chat completions response from
88
the service using a synchronous client. The sample
9-
shows how to include an image in the input chat messages.
9+
shows how to include an image URL in the input chat messages.
1010
This sample will only work on AI models that support image input.
1111
1212
USAGE:
13-
python sample_chat_completions_with_images.py
13+
python sample_chat_completions_with_image_url.py
1414
1515
Set these two or three environment variables before running the sample:
1616
1) CHAT_COMPLETIONS_ENDPOINT - Your endpoint URL, in the form
@@ -23,7 +23,7 @@
2323
"""
2424

2525

26-
def sample_chat_completions_with_images():
26+
def sample_chat_completions_with_image_url():
2727
import os
2828
from azure.ai.inference import ChatCompletionsClient
2929
from azure.ai.inference.models import (
@@ -74,4 +74,4 @@ def sample_chat_completions_with_images():
7474

7575

7676
if __name__ == "__main__":
77-
sample_chat_completions_with_images()
77+
sample_chat_completions_with_image_url()

sdk/ai/azure-ai-inference/tests/test_model_inference_client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Licensed under the MIT License.
44
# ------------------------------------
55
import inspect
6+
import os
67
import azure.ai.inference as sdk
78

89
from model_inference_test_base import ModelClientTestBase, ServicePreparerChatCompletions, ServicePreparerEmbeddings
@@ -65,6 +66,18 @@ def test_embeddings(self, **kwargs):
6566
self._validate_embeddings_result(response)
6667
client.close()
6768

69+
def test_image_url_load(self, **kwargs):
70+
local_folder = os.path.dirname(os.path.abspath(__file__))
71+
image_file = os.path.join(local_folder, "../samples/sample1.png")
72+
image_url = sdk.models._patch.ImageUrl.load(
73+
image_file=image_file,
74+
image_format="png",
75+
detail=sdk.models.ImageDetailLevel.AUTO,
76+
)
77+
assert image_url
78+
assert image_url.url.startswith("data:image/png;base64,iVBORw")
79+
assert image_url.detail == sdk.models.ImageDetailLevel.AUTO
80+
6881
# **********************************************************************************
6982
#
7083
# HAPPY PATH TESTS - CHAT COMPLETIONS

0 commit comments

Comments
 (0)