Skip to content

Commit b1fb9d6

Browse files
author
Jicheng Lu
committed
add image edit utility
1 parent de5750d commit b1fb9d6

File tree

12 files changed

+229
-20
lines changed

12 files changed

+229
-20
lines changed

src/Infrastructure/BotSharp.Abstraction/Files/Models/LlmFileContext.cs

Lines changed: 0 additions & 16 deletions
This file was deleted.

src/Plugins/BotSharp.Plugin.EmailHandler/Functions/HandleEmailRequestFn.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public async Task<bool> Execute(RoleDialogModel message)
7373

7474
private async Task<IEnumerable<MessageFileModel>> GetConversationFiles()
7575
{
76-
var convService = _services.GetService<IConversationService>();
76+
var convService = _services.GetRequiredService<IConversationService>();
7777
var fileService = _services.GetRequiredService<IBotSharpFileService>();
7878
var conversationId = convService.ConversationId;
7979
var dialogs = convService.GetDialogHistory(fromBreakpoint: false);

src/Plugins/BotSharp.Plugin.FileHandler/BotSharp.Plugin.FileHandler.csproj

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@
1111
</PropertyGroup>
1212

1313
<ItemGroup>
14+
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\edit_image.json" />
1415
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\generate_image.json" />
1516
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\read_image.json" />
1617
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\read_pdf.json" />
18+
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\edit_image.fn.liquid" />
1719
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\generate_image.fn.liquid" />
1820
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\read_image.fn.liquid" />
1921
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\read_pdf.fn.liquid" />
22+
<None Remove="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\select_edit_image_prompt.liquid" />
2023
</ItemGroup>
2124

2225
<ItemGroup>
@@ -38,6 +41,15 @@
3841
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\read_pdf.fn.liquid">
3942
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
4043
</Content>
44+
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\functions\edit_image.json">
45+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
46+
</Content>
47+
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\edit_image.fn.liquid">
48+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
49+
</Content>
50+
<Content Include="data\agents\6745151e-6d46-4a02-8de4-1c4f21c7da95\templates\select_edit_image_prompt.liquid">
51+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
52+
</Content>
4153
</ItemGroup>
4254

4355
<ItemGroup>

src/Plugins/BotSharp.Plugin.FileHandler/Enums/UtilityName.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ public class UtilityName
44
{
55
public const string ImageGenerator = "image-generator";
66
public const string ImageReader = "image-reader";
7+
public const string ImageEditor = "image-editor";
78
public const string PdfReader = "pdf-reader";
89
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
using BotSharp.Abstraction.Templating;
2+
using System.IO;
3+
4+
namespace BotSharp.Plugin.FileHandler.Functions;
5+
6+
public class EditImageFn : IFunctionCallback
7+
{
8+
public string Name => "edit_image";
9+
public string Indication => "Editing image";
10+
11+
private readonly IServiceProvider _services;
12+
private readonly ILogger<EditImageFn> _logger;
13+
private string _conversationId;
14+
private string _messageId;
15+
16+
public EditImageFn(
17+
IServiceProvider services,
18+
ILogger<EditImageFn> logger)
19+
{
20+
_services = services;
21+
_logger = logger;
22+
}
23+
24+
public async Task<bool> Execute(RoleDialogModel message)
25+
{
26+
var args = JsonSerializer.Deserialize<LlmContextIn>(message.FunctionArgs);
27+
var descrpition = args?.UserRequest ?? string.Empty;
28+
Init(message);
29+
SetImageOptions();
30+
31+
var image = await SelectConversationImage();
32+
var response = await GetImageEditGeneration(message, descrpition, image);
33+
message.Content = response;
34+
return true;
35+
}
36+
37+
private void Init(RoleDialogModel message)
38+
{
39+
var convService = _services.GetRequiredService<IConversationService>();
40+
_conversationId = convService.ConversationId;
41+
_messageId = message.MessageId;
42+
}
43+
44+
private void SetImageOptions()
45+
{
46+
var state = _services.GetRequiredService<IConversationStateService>();
47+
state.SetState("image_format", "bytes");
48+
state.SetState("image_count", "1");
49+
}
50+
51+
private async Task<MessageFileModel?> SelectConversationImage()
52+
{
53+
var convService = _services.GetRequiredService<IConversationService>();
54+
var fileService = _services.GetRequiredService<IBotSharpFileService>();
55+
var dialogs = convService.GetDialogHistory(fromBreakpoint: false);
56+
var messageIds = dialogs.Select(x => x.MessageId).Distinct().ToList();
57+
var images = fileService.GetMessageFiles(_conversationId, messageIds, FileSourceType.User, imageOnly: true);
58+
return await SelectImage(images, dialogs);
59+
}
60+
61+
private async Task<MessageFileModel?> SelectImage(IEnumerable<MessageFileModel> images, List<RoleDialogModel> dialogs)
62+
{
63+
if (images.IsNullOrEmpty()) return null;
64+
65+
var llmProviderService = _services.GetRequiredService<ILlmProviderService>();
66+
var render = _services.GetRequiredService<ITemplateRender>();
67+
var db = _services.GetRequiredService<IBotSharpRepository>();
68+
69+
try
70+
{
71+
var promptImages = images.Where(x => x.ContentType == MediaTypeNames.Image.Png).Select((x, idx) =>
72+
{
73+
return $"id: {idx + 1}, image_name: {x.FileName}.{x.FileType}";
74+
}).ToList();
75+
76+
if (promptImages.IsNullOrEmpty()) return null;
77+
78+
var prompt = db.GetAgentTemplate(BuiltInAgentId.UtilityAssistant, "select_edit_image_prompt");
79+
prompt = render.Render(prompt, new Dictionary<string, object>
80+
{
81+
{ "image_list", promptImages }
82+
});
83+
84+
var agent = new Agent
85+
{
86+
Id = BuiltInAgentId.UtilityAssistant,
87+
Name = "Utility Assistant",
88+
Instruction = prompt
89+
};
90+
91+
var provider = llmProviderService.GetProviders().FirstOrDefault(x => x == "openai");
92+
var model = llmProviderService.GetProviderModel(provider: provider, id: "gpt-4");
93+
var completion = CompletionProvider.GetChatCompletion(_services, provider: provider, model: model.Name);
94+
var response = await completion.GetChatCompletions(agent, dialogs);
95+
var content = response?.Content ?? string.Empty;
96+
var fid = JsonSerializer.Deserialize<int?>(content);
97+
return images.Where((x, idx) => idx == fid - 1).FirstOrDefault();
98+
}
99+
catch (Exception ex)
100+
{
101+
_logger.LogWarning($"Error when getting the image edit response. {ex.Message}\r\n{ex.InnerException}");
102+
return null;
103+
}
104+
}
105+
106+
private async Task<string> GetImageEditGeneration(RoleDialogModel message, string description, MessageFileModel? image)
107+
{
108+
if (image == null)
109+
{
110+
return "Failed to find an image. Please provide an image.";
111+
}
112+
113+
try
114+
{
115+
var completion = CompletionProvider.GetImageCompletion(_services, provider: "openai", model: "dall-e-2");
116+
var text = !string.IsNullOrWhiteSpace(description) ? description : message.Content;
117+
var dialog = RoleDialogModel.From(message, AgentRole.User, text);
118+
var agent = new Agent
119+
{
120+
Id = BuiltInAgentId.UtilityAssistant,
121+
Name = "Utility Assistant"
122+
};
123+
124+
using var stream = File.OpenRead(image.FileStorageUrl);
125+
var result = await completion.GetImageEdits(agent, dialog, stream, image.FileName ?? string.Empty);
126+
stream.Close();
127+
SaveGeneratedImage(result?.GeneratedImages?.FirstOrDefault());
128+
129+
return !string.IsNullOrWhiteSpace(result?.Content) ? result.Content : "Image edit is completed.";
130+
}
131+
catch (Exception ex)
132+
{
133+
var error = $"Error when getting image edit response. {ex.Message}";
134+
_logger.LogWarning($"{error}\r\n{ex.InnerException}");
135+
return error;
136+
}
137+
}
138+
139+
private void SaveGeneratedImage(ImageGeneration? image)
140+
{
141+
if (image == null) return;
142+
143+
var files = new List<BotSharpFile>()
144+
{
145+
new BotSharpFile
146+
{
147+
FileName = $"{Guid.NewGuid()}.png",
148+
FileData = $"data:{MediaTypeNames.Image.Png};base64,{image.ImageData}"
149+
}
150+
};
151+
152+
var fileService = _services.GetRequiredService<IBotSharpFileService>();
153+
fileService.SaveMessageFiles(_conversationId, _messageId, FileSourceType.Bot, files);
154+
}
155+
}

src/Plugins/BotSharp.Plugin.FileHandler/Functions/GenerateImageFn.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public GenerateImageFn(
2020

2121
public async Task<bool> Execute(RoleDialogModel message)
2222
{
23-
var args = JsonSerializer.Deserialize<LlmFileContext>(message.FunctionArgs);
23+
var args = JsonSerializer.Deserialize<LlmContextIn>(message.FunctionArgs);
2424
Init(message);
2525
SetImageOptions();
2626

src/Plugins/BotSharp.Plugin.FileHandler/Hooks/FileHandlerHook.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ public class FileHandlerHook : AgentHookBase, IAgentHook
55
private const string READ_IMAGE_FN = "read_image";
66
private const string READ_PDF_FN = "read_pdf";
77
private const string GENERATE_IMAGE_FN = "generate_image";
8+
private const string EDIT_IMAGE_FN = "edit_image";
89

910
public override string SelfId => string.Empty;
1011

@@ -19,9 +20,11 @@ public override void OnAgentLoaded(Agent agent)
1920

2021
if (isConvMode)
2122
{
23+
AddUtility(agent, UtilityName.ImageGenerator, GENERATE_IMAGE_FN);
2224
AddUtility(agent, UtilityName.ImageReader, READ_IMAGE_FN);
25+
AddUtility(agent, UtilityName.ImageEditor, EDIT_IMAGE_FN);
2326
AddUtility(agent, UtilityName.PdfReader, READ_PDF_FN);
24-
AddUtility(agent, UtilityName.ImageGenerator, GENERATE_IMAGE_FN);
27+
2528
}
2629

2730
base.OnAgentLoaded(agent);

src/Plugins/BotSharp.Plugin.FileHandler/Hooks/FileHandlerUtilityHook.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ public class FileHandlerUtilityHook : IAgentUtilityHook
44
{
55
public void AddUtilities(List<string> utilities)
66
{
7+
utilities.Add(UtilityName.ImageGenerator);
78
utilities.Add(UtilityName.ImageReader);
9+
utilities.Add(UtilityName.ImageEditor);
810
utilities.Add(UtilityName.PdfReader);
9-
utilities.Add(UtilityName.ImageGenerator);
1011
}
1112
}

src/Plugins/BotSharp.Plugin.FileHandler/LlmContexts/LlmContextIn.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,8 @@ public class LlmContextIn
77
[JsonPropertyName("user_request")]
88
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
99
public string? UserRequest { get; set; }
10+
11+
[JsonPropertyName("image_description")]
12+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
13+
public string? ImageDescription { get; set; }
1014
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "edit_image",
3+
"description": "If the user requests you editting or changing an image or a picture, you can call this function to edit an image.",
4+
"parameters": {
5+
"type": "object",
6+
"properties": {
7+
"user_request": {
8+
"type": "string",
9+
"description": "The request posted by user, which is related to editing the requested image."
10+
}
11+
},
12+
"required": [ "user_request" ]
13+
}
14+
}

0 commit comments

Comments
 (0)