Skip to content

Commit 2cfc167

Browse files
[Inference] Image content (#205371)
Adds support for image content parts in the Inference plugin. Only base64 encoded images are supported, as this capability is shared across all three LLM providers. --------- Co-authored-by: Elastic Machine <[email protected]>
1 parent 7e82712 commit 2cfc167

File tree

11 files changed

+367
-19
lines changed

11 files changed

+367
-19
lines changed

x-pack/platform/packages/shared/ai-infra/inference-common/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ export {
1010
ChatCompletionEventType,
1111
ToolChoiceType,
1212
type Message,
13+
type MessageContentImage,
14+
type MessageContentText,
15+
type MessageContent,
1316
type AssistantMessage,
1417
type ToolMessage,
1518
type UserMessage,

x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ export {
2929
} from './events';
3030
export {
3131
MessageRole,
32+
type MessageContent,
33+
type MessageContentImage,
34+
type MessageContentText,
3235
type Message,
3336
type AssistantMessage,
3437
type UserMessage,

x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,26 @@ interface MessageBase<TRole extends MessageRole> {
2323
role: TRole;
2424
}
2525

26+
export interface MessageContentText {
27+
type: 'text';
28+
text: string;
29+
}
30+
31+
export interface MessageContentImage {
32+
type: 'image';
33+
source: { data: string; mimeType: string };
34+
}
35+
36+
export type MessageContent = string | Array<MessageContentText | MessageContentImage>;
37+
2638
/**
2739
* Represents a message from the user.
2840
*/
2941
export type UserMessage = MessageBase<MessageRole.User> & {
3042
/**
31-
* The text content of the user message
43+
* The text or image content of the user message
3244
*/
33-
content: string;
45+
content: MessageContent;
3446
};
3547

3648
/**

x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,92 @@ describe('bedrockClaudeAdapter', () => {
256256
expect(system).toEqual('Some system message');
257257
});
258258

259+
it('correctly formats messages with content parts', () => {
260+
bedrockClaudeAdapter.chatComplete({
261+
executor: executorMock,
262+
logger,
263+
messages: [
264+
{
265+
role: MessageRole.User,
266+
content: [
267+
{
268+
type: 'text',
269+
text: 'question',
270+
},
271+
],
272+
},
273+
{
274+
role: MessageRole.Assistant,
275+
content: 'answer',
276+
},
277+
{
278+
role: MessageRole.User,
279+
content: [
280+
{
281+
type: 'image',
282+
source: {
283+
data: 'aaaaaa',
284+
mimeType: 'image/png',
285+
},
286+
},
287+
{
288+
type: 'image',
289+
source: {
290+
data: 'bbbbbb',
291+
mimeType: 'image/png',
292+
},
293+
},
294+
],
295+
},
296+
],
297+
});
298+
299+
expect(executorMock.invoke).toHaveBeenCalledTimes(1);
300+
301+
const { messages } = getCallParams();
302+
expect(messages).toEqual([
303+
{
304+
rawContent: [
305+
{
306+
text: 'question',
307+
type: 'text',
308+
},
309+
],
310+
role: 'user',
311+
},
312+
{
313+
rawContent: [
314+
{
315+
text: 'answer',
316+
type: 'text',
317+
},
318+
],
319+
role: 'assistant',
320+
},
321+
{
322+
rawContent: [
323+
{
324+
type: 'image',
325+
source: {
326+
data: 'aaaaaa',
327+
mediaType: 'image/png',
328+
type: 'base64',
329+
},
330+
},
331+
{
332+
type: 'image',
333+
source: {
334+
data: 'bbbbbb',
335+
mediaType: 'image/png',
336+
type: 'base64',
337+
},
338+
},
339+
],
340+
role: 'user',
341+
},
342+
]);
343+
});
344+
259345
it('correctly format tool choice', () => {
260346
bedrockClaudeAdapter.chatComplete({
261347
executor: executorMock,

x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import {
1717
} from '@kbn/inference-common';
1818
import { parseSerdeChunkMessage } from './serde_utils';
1919
import { InferenceConnectorAdapter } from '../../types';
20-
import type { BedRockMessage, BedrockToolChoice } from './types';
20+
import type { BedRockImagePart, BedRockMessage, BedRockTextPart, BedrockToolChoice } from './types';
2121
import {
2222
BedrockChunkMember,
2323
serdeEventstreamIntoObservable,
@@ -153,7 +153,24 @@ const messagesToBedrock = (messages: Message[]): BedRockMessage[] => {
153153
case MessageRole.User:
154154
return {
155155
role: 'user' as const,
156-
rawContent: [{ type: 'text' as const, text: message.content }],
156+
rawContent: (typeof message.content === 'string'
157+
? [message.content]
158+
: message.content
159+
).map((contentPart) => {
160+
if (typeof contentPart === 'string') {
161+
return { text: contentPart, type: 'text' } satisfies BedRockTextPart;
162+
} else if (contentPart.type === 'text') {
163+
return { text: contentPart.text, type: 'text' } satisfies BedRockTextPart;
164+
}
165+
return {
166+
type: 'image',
167+
source: {
168+
data: contentPart.source.data,
169+
mediaType: contentPart.source.mimeType,
170+
type: 'base64',
171+
},
172+
} satisfies BedRockImagePart;
173+
}),
157174
};
158175
case MessageRole.Assistant:
159176
return {

x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/types.ts

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,38 @@ export interface BedRockMessage {
1717
/**
1818
* Bedrock message parts
1919
*/
20+
export interface BedRockTextPart {
21+
type: 'text';
22+
text: string;
23+
}
24+
25+
export interface BedRockToolUsePart {
26+
type: 'tool_use';
27+
id: string;
28+
name: string;
29+
input: Record<string, unknown>;
30+
}
31+
32+
export interface BedRockToolResultPart {
33+
type: 'tool_result';
34+
tool_use_id: string;
35+
content: string;
36+
}
37+
38+
export interface BedRockImagePart {
39+
type: 'image';
40+
source: {
41+
type: 'base64';
42+
mediaType: string;
43+
data: string;
44+
};
45+
}
46+
2047
export type BedRockMessagePart =
21-
| { type: 'text'; text: string }
22-
| {
23-
type: 'tool_use';
24-
id: string;
25-
name: string;
26-
input: Record<string, unknown>;
27-
}
28-
| { type: 'tool_result'; tool_use_id: string; content: string };
48+
| BedRockTextPart
49+
| BedRockToolUsePart
50+
| BedRockToolResultPart
51+
| BedRockImagePart;
2952

3053
export type BedrockToolChoice = { type: 'auto' } | { type: 'any' } | { type: 'tool'; name: string };
3154

x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,86 @@ describe('geminiAdapter', () => {
239239
]);
240240
});
241241

242+
it('correctly formats content parts', () => {
243+
geminiAdapter.chatComplete({
244+
executor: executorMock,
245+
logger,
246+
messages: [
247+
{
248+
role: MessageRole.User,
249+
content: [
250+
{
251+
type: 'text',
252+
text: 'question',
253+
},
254+
],
255+
},
256+
{
257+
role: MessageRole.Assistant,
258+
content: 'answer',
259+
},
260+
{
261+
role: MessageRole.User,
262+
content: [
263+
{
264+
type: 'image',
265+
source: {
266+
data: 'aaaaaa',
267+
mimeType: 'image/png',
268+
},
269+
},
270+
{
271+
type: 'image',
272+
source: {
273+
data: 'bbbbbb',
274+
mimeType: 'image/png',
275+
},
276+
},
277+
],
278+
},
279+
],
280+
});
281+
282+
expect(executorMock.invoke).toHaveBeenCalledTimes(1);
283+
284+
const { messages } = getCallParams();
285+
expect(messages).toEqual([
286+
{
287+
parts: [
288+
{
289+
text: 'question',
290+
},
291+
],
292+
role: 'user',
293+
},
294+
{
295+
parts: [
296+
{
297+
text: 'answer',
298+
},
299+
],
300+
role: 'assistant',
301+
},
302+
{
303+
parts: [
304+
{
305+
inlineData: {
306+
data: 'aaaaaa',
307+
mimeType: 'image/png',
308+
},
309+
},
310+
{
311+
inlineData: {
312+
data: 'bbbbbb',
313+
mimeType: 'image/png',
314+
},
315+
},
316+
],
317+
role: 'user',
318+
},
319+
]);
320+
});
321+
242322
it('groups messages from the same user', () => {
243323
geminiAdapter.chatComplete({
244324
logger,

x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,21 @@ function messageToGeminiMapper() {
196196
case MessageRole.User:
197197
const userMessage: GeminiMessage = {
198198
role: 'user',
199-
parts: [
200-
{
201-
text: message.content,
202-
},
203-
],
199+
parts: (typeof message.content === 'string' ? [message.content] : message.content).map(
200+
(contentPart) => {
201+
if (typeof contentPart === 'string') {
202+
return { text: contentPart } satisfies Gemini.TextPart;
203+
} else if (contentPart.type === 'text') {
204+
return { text: contentPart.text } satisfies Gemini.TextPart;
205+
}
206+
return {
207+
inlineData: {
208+
data: contentPart.source.data,
209+
mimeType: contentPart.source.mimeType,
210+
},
211+
} satisfies Gemini.InlineDataPart;
212+
}
213+
),
204214
};
205215
return userMessage;
206216

0 commit comments

Comments
 (0)