Skip to content

Commit 19fef34

Browse files
authored
.Net: Added support for OpenAI image detail level property (#9561)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Resolves: #4759 This PR adds a support for the `detail` property in the OpenAI image API. The property can be configured using `ImageContent.Metadata` property. `ImageContent.Metadata` usage: ```csharp chatHistory.AddUserMessage( [ new TextContent("What’s in this image?"), new ImageContent(imageBytes, "image/jpg") { Metadata = new Dictionary<string, object?> { ["ChatImageDetailLevel"] = "high" } } ]); ``` ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
1 parent c613ae4 commit 19fef34

File tree

4 files changed

+143
-2
lines changed

4 files changed

+143
-2
lines changed

dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,28 @@ public async Task LocalImageAsync()
5656

5757
Console.WriteLine(reply.Content);
5858
}
59+
60+
[Fact]
61+
public async Task LocalImageWithImageDetailInMetadataAsync()
62+
{
63+
var imageBytes = await EmbeddedResource.ReadAllAsync("sample_image.jpg");
64+
65+
var kernel = Kernel.CreateBuilder()
66+
.AddOpenAIChatCompletion("gpt-4-vision-preview", TestConfiguration.OpenAI.ApiKey)
67+
.Build();
68+
69+
var chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
70+
71+
var chatHistory = new ChatHistory("You are a friendly assistant.");
72+
73+
chatHistory.AddUserMessage(
74+
[
75+
new TextContent("What’s in this image?"),
76+
new ImageContent(imageBytes, "image/jpg") { Metadata = new Dictionary<string, object?> { ["ChatImageDetailLevel"] = "high" } }
77+
]);
78+
79+
var reply = await chatCompletionService.GetChatMessageContentAsync(chatHistory);
80+
81+
Console.WriteLine(reply.Content);
82+
}
5983
}

dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,69 @@ public async Task GetChatMessageContentsWithChatMessageContentItemCollectionAndS
642642
Assert.Equal("image_url", contentItems[1].GetProperty("type").GetString());
643643
}
644644

645+
[Theory]
646+
[MemberData(nameof(ImageContentMetadataDetailLevelData))]
647+
public async Task GetChatMessageContentsHandlesImageDetailLevelInMetadataCorrectlyAsync(object? detailLevel, string? expectedDetailLevel)
648+
{
649+
// Arrange
650+
var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4-vision-preview", apiKey: "NOKEY", httpClient: this._httpClient);
651+
652+
using var response = new HttpResponseMessage(System.Net.HttpStatusCode.OK) { Content = new StringContent(ChatCompletionResponse) };
653+
this._messageHandlerStub.ResponseToReturn = response;
654+
655+
var chatHistory = new ChatHistory();
656+
chatHistory.AddUserMessage(
657+
[
658+
new ImageContent(new Uri("https://image")) { Metadata = new Dictionary<string, object?> { ["ChatImageDetailLevel"] = detailLevel } }
659+
]);
660+
661+
// Act
662+
await chatCompletion.GetChatMessageContentsAsync(chatHistory);
663+
664+
// Assert
665+
var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!);
666+
Assert.NotNull(actualRequestContent);
667+
var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
668+
669+
var messages = optionsJson.GetProperty("messages");
670+
671+
Assert.Equal(1, messages.GetArrayLength());
672+
673+
var contentItems = messages[0].GetProperty("content");
674+
Assert.Equal(1, contentItems.GetArrayLength());
675+
676+
Assert.Equal("image_url", contentItems[0].GetProperty("type").GetString());
677+
678+
var imageProperty = contentItems[0].GetProperty("image_url");
679+
680+
Assert.Equal("https://image/", imageProperty.GetProperty("url").GetString());
681+
682+
if (detailLevel is null || (detailLevel is string detailLevelString && string.IsNullOrWhiteSpace(detailLevelString)))
683+
{
684+
Assert.False(imageProperty.TryGetProperty("detail", out _));
685+
}
686+
else
687+
{
688+
Assert.Equal(expectedDetailLevel, imageProperty.GetProperty("detail").GetString());
689+
}
690+
}
691+
692+
[Fact]
693+
public async Task GetChatMessageContentsThrowsExceptionWithInvalidImageDetailLevelInMetadataAsync()
694+
{
695+
// Arrange
696+
var chatCompletion = new OpenAIChatCompletionService(modelId: "gpt-4-vision-preview", apiKey: "NOKEY", httpClient: this._httpClient);
697+
698+
var chatHistory = new ChatHistory();
699+
chatHistory.AddUserMessage(
700+
[
701+
new ImageContent(new Uri("https://image")) { Metadata = new Dictionary<string, object?> { ["ChatImageDetailLevel"] = "invalid_value" } }
702+
]);
703+
704+
// Act & Assert
705+
await Assert.ThrowsAsync<ArgumentException>(() => chatCompletion.GetChatMessageContentsAsync(chatHistory));
706+
}
707+
645708
[Fact]
646709
public async Task FunctionCallsShouldBePropagatedToCallersViaChatMessageItemsOfTypeFunctionCallContentAsync()
647710
{
@@ -1558,6 +1621,15 @@ public async Task OnAutoFunctionInvocationAsync(AutoFunctionInvocationContext co
15581621
}
15591622
""";
15601623

1624+
public static TheoryData<object?, string?> ImageContentMetadataDetailLevelData => new()
1625+
{
1626+
{ "auto", "auto" },
1627+
{ "high", "high" },
1628+
{ "low", "low" },
1629+
{ "", null },
1630+
{ null, null }
1631+
};
1632+
15611633
#pragma warning disable CS8618, CA1812
15621634
private sealed class MathReasoning
15631635
{

dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -802,19 +802,44 @@ private static List<ChatMessage> CreateRequestMessages(ChatMessageContent messag
802802

803803
private static ChatMessageContentPart GetImageContentItem(ImageContent imageContent)
804804
{
805+
ChatImageDetailLevel? detailLevel = GetChatImageDetailLevel(imageContent);
806+
805807
if (imageContent.Data is { IsEmpty: false } data)
806808
{
807-
return ChatMessageContentPart.CreateImagePart(BinaryData.FromBytes(data), imageContent.MimeType);
809+
return ChatMessageContentPart.CreateImagePart(BinaryData.FromBytes(data), imageContent.MimeType, detailLevel);
808810
}
809811

810812
if (imageContent.Uri is not null)
811813
{
812-
return ChatMessageContentPart.CreateImagePart(imageContent.Uri);
814+
return ChatMessageContentPart.CreateImagePart(imageContent.Uri, detailLevel);
813815
}
814816

815817
throw new ArgumentException($"{nameof(ImageContent)} must have either Data or a Uri.");
816818
}
817819

820+
private static ChatImageDetailLevel? GetChatImageDetailLevel(ImageContent imageContent)
821+
{
822+
const string DetailLevelProperty = "ChatImageDetailLevel";
823+
824+
if (imageContent.Metadata is not null &&
825+
imageContent.Metadata.TryGetValue(DetailLevelProperty, out object? detailLevel) &&
826+
detailLevel is not null)
827+
{
828+
if (detailLevel is string detailLevelString && !string.IsNullOrWhiteSpace(detailLevelString))
829+
{
830+
return detailLevelString.ToUpperInvariant() switch
831+
{
832+
"AUTO" => ChatImageDetailLevel.Auto,
833+
"LOW" => ChatImageDetailLevel.Low,
834+
"HIGH" => ChatImageDetailLevel.High,
835+
_ => throw new ArgumentException($"Unknown image detail level '{detailLevelString}'. Supported values are 'Auto', 'Low' and 'High'.")
836+
};
837+
}
838+
}
839+
840+
return null;
841+
}
842+
818843
private OpenAIChatMessageContent CreateChatMessageContent(OpenAIChatCompletion completion, string targetModel)
819844
{
820845
var message = new OpenAIChatMessageContent(completion, targetModel, this.GetChatCompletionMetadata(completion));

dotnet/src/SemanticKernel.UnitTests/Contents/ImageContentTests.cs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright (c) Microsoft. All rights reserved.
22

33
using System;
4+
using System.Collections.Generic;
45
using System.Text;
56
using System.Text.Json;
67
using Microsoft.SemanticKernel;
@@ -241,6 +242,25 @@ public void EmptyConstructorSerializationAndDeserializationAsExpected()
241242
Assert.Null(deserialized.Metadata);
242243
}
243244

245+
[Fact]
246+
public void MetadataSerializationAndDeserializationWorksCorrectly()
247+
{
248+
// Arrange
249+
var content = new ImageContent()
250+
{
251+
Metadata = new Dictionary<string, object?> { ["ChatImageDetailLevel"] = "high" }
252+
};
253+
254+
// Act
255+
var serialized = JsonSerializer.Serialize(content);
256+
var deserialized = JsonSerializer.Deserialize<ImageContent>(serialized);
257+
258+
// Assert
259+
Assert.NotNull(deserialized?.Metadata);
260+
Assert.True(deserialized.Metadata.ContainsKey("ChatImageDetailLevel"));
261+
Assert.Equal("high", deserialized.Metadata["ChatImageDetailLevel"]?.ToString());
262+
}
263+
244264
[Theory]
245265
[InlineData("http://localhost:9090/")]
246266
[InlineData(null)]

0 commit comments

Comments
 (0)