Skip to content

Commit f75590b

Browse files
feat(llma): multimodal display (#42317)
1 parent 7317444 commit f75590b

File tree

4 files changed

+408
-56
lines changed

4 files changed

+408
-56
lines changed

frontend/src/scenes/data-management/events/eventDefinitionsTableLogic.test.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,13 @@ describe('eventDefinitionsTableLogic', () => {
320320
await expectLogic(logic, () => {
321321
logic.actions.loadPropertiesForEvent(eventDefinition)
322322
})
323-
.toDispatchActions(['loadPropertiesForEvent', 'loadPropertiesForEventSuccess'])
323+
.toDispatchActionsInAnyOrder([
324+
router.actionCreators.push(url),
325+
'loadEventDefinitions',
326+
'loadEventDefinitionsSuccess',
327+
'loadPropertiesForEvent',
328+
'loadPropertiesForEventSuccess',
329+
])
324330
.toMatchValues({
325331
eventPropertiesCacheMap: partial({
326332
[eventDefinition.id]: partial({
@@ -329,7 +335,7 @@ describe('eventDefinitionsTableLogic', () => {
329335
}),
330336
}),
331337
})
332-
expect(api.get).toHaveBeenCalledTimes(2)
338+
expect(api.get).toHaveBeenCalledTimes(3)
333339
// Forwards
334340
await expectLogic(logic, () => {
335341
logic.actions.loadPropertiesForEvent(
@@ -347,7 +353,7 @@ describe('eventDefinitionsTableLogic', () => {
347353
}),
348354
}),
349355
})
350-
expect(api.get).toHaveBeenCalledTimes(3)
356+
expect(api.get).toHaveBeenCalledTimes(4)
351357
// Backwards
352358
await expectLogic(logic, () => {
353359
logic.actions.loadPropertiesForEvent(eventDefinition, propertiesStartingUrl)
@@ -361,7 +367,7 @@ describe('eventDefinitionsTableLogic', () => {
361367
}),
362368
}),
363369
})
364-
expect(api.get).toHaveBeenCalledTimes(3)
370+
expect(api.get).toHaveBeenCalledTimes(4)
365371
})
366372
})
367373
})

products/llm_analytics/frontend/ConversationDisplay/ConversationMessagesDisplay.tsx

Lines changed: 136 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,19 @@ import { LLMInputOutput } from '../LLMInputOutput'
1515
import { SearchHighlight } from '../SearchHighlight'
1616
import { llmAnalyticsTraceLogic } from '../llmAnalyticsTraceLogic'
1717
import { containsSearchQuery } from '../searchUtils'
18-
import { CompatMessage, VercelSDKImageMessage } from '../types'
19-
import { looksLikeXml } from '../utils'
18+
import { CompatMessage, MultiModalContentItem, VercelSDKImageMessage } from '../types'
19+
import {
20+
getGeminiInlineData,
21+
isAnthropicDocumentMessage,
22+
isAnthropicImageMessage,
23+
isGeminiAudioMessage,
24+
isGeminiDocumentMessage,
25+
isGeminiImageMessage,
26+
isOpenAIAudioMessage,
27+
isOpenAIFileMessage,
28+
isOpenAIImageURLMessage,
29+
looksLikeXml,
30+
} from '../utils'
2031
import { HighlightedLemonMarkdown } from './HighlightedLemonMarkdown'
2132
import { HighlightedXMLViewer } from './HighlightedXMLViewer'
2233
import { XMLViewer } from './XMLViewer'
@@ -231,6 +242,127 @@ export const ImageMessageDisplay = ({
231242
return <span>{content}</span>
232243
}
233244

245+
function renderContentItem(item: MultiModalContentItem, searchQuery?: string): JSX.Element | null {
246+
if (typeof item === 'string') {
247+
return searchQuery?.trim() ? (
248+
<SearchHighlight string={item} substring={searchQuery} className="whitespace-pre-wrap" />
249+
) : (
250+
<span className="whitespace-pre-wrap">{item}</span>
251+
)
252+
}
253+
254+
if (!item || typeof item !== 'object' || !('type' in item)) {
255+
return <HighlightedJSONViewer src={item} name={null} collapsed={5} searchQuery={searchQuery} />
256+
}
257+
258+
if (item.type === 'text' && 'text' in item) {
259+
return searchQuery?.trim() && typeof item.text === 'string' ? (
260+
<SearchHighlight string={item.text} substring={searchQuery} className="whitespace-pre-wrap" />
261+
) : (
262+
<span className="whitespace-pre-wrap">{item.text}</span>
263+
)
264+
}
265+
266+
if (item.type === 'image' && 'image' in item && typeof item.image === 'string') {
267+
return <ImageMessageDisplay message={{ content: { type: 'image', image: item.image } }} />
268+
}
269+
270+
if (isOpenAIImageURLMessage(item)) {
271+
return <img src={item.image_url.url} alt="Message content" className="max-w-full max-h-[400px] rounded" />
272+
}
273+
274+
if (isAnthropicImageMessage(item)) {
275+
return (
276+
<img
277+
src={`data:${item.source.media_type};base64,${item.source.data}`}
278+
alt="Message content"
279+
className="max-w-full max-h-[400px] rounded"
280+
/>
281+
)
282+
}
283+
284+
if (isGeminiImageMessage(item)) {
285+
const inlineData = getGeminiInlineData(item)
286+
if (!inlineData) {
287+
return null
288+
}
289+
return (
290+
<img
291+
src={`data:${inlineData.mime_type};base64,${inlineData.data}`}
292+
alt="Message content"
293+
className="max-w-full max-h-[400px] rounded"
294+
/>
295+
)
296+
}
297+
298+
if (isOpenAIFileMessage(item)) {
299+
if (!item.file.file_data.startsWith('data:')) {
300+
return <span className="text-muted">{item.file.filename}</span>
301+
}
302+
return (
303+
// eslint-disable-next-line react/forbid-elements
304+
<a href={item.file.file_data} download={item.file.filename} className="text-link hover:underline">
305+
{item.file.filename}
306+
</a>
307+
)
308+
}
309+
310+
if (isAnthropicDocumentMessage(item)) {
311+
const fileName = `document.${item.source.media_type.split('/')[1] || 'bin'}`
312+
return (
313+
// eslint-disable-next-line react/forbid-elements
314+
<a
315+
href={`data:${item.source.media_type};base64,${item.source.data}`}
316+
download={fileName}
317+
className="text-link hover:underline"
318+
>
319+
{fileName}
320+
</a>
321+
)
322+
}
323+
324+
if (isGeminiDocumentMessage(item)) {
325+
const inlineData = getGeminiInlineData(item)
326+
if (!inlineData) {
327+
return null
328+
}
329+
const fileName = `document.${inlineData.mime_type.split('/')[1] || 'bin'}`
330+
return (
331+
// eslint-disable-next-line react/forbid-elements
332+
<a
333+
href={`data:${inlineData.mime_type};base64,${inlineData.data}`}
334+
download={fileName}
335+
className="text-link hover:underline"
336+
>
337+
{fileName}
338+
</a>
339+
)
340+
}
341+
342+
if (isOpenAIAudioMessage(item) || isGeminiAudioMessage(item)) {
343+
const mimeType = 'mime_type' in item ? item.mime_type : undefined
344+
const transcript = 'transcript' in item ? item.transcript : undefined
345+
346+
return (
347+
<div className="space-y-2">
348+
<audio
349+
controls
350+
className="w-[500px]"
351+
src={mimeType ? `data:${mimeType};base64,${item.data}` : `data:audio/wav;base64,${item.data}`}
352+
/>
353+
{transcript && typeof transcript === 'string' && (
354+
<div className="text-xs text-muted p-2 bg-bg-light rounded border">
355+
<div className="font-semibold mb-1">Transcript:</div>
356+
<div className="whitespace-pre-wrap">{transcript}</div>
357+
</div>
358+
)}
359+
</div>
360+
)
361+
}
362+
363+
return <HighlightedJSONViewer src={item} name={null} collapsed={5} searchQuery={searchQuery} />
364+
}
365+
234366
export const LLMMessageDisplay = React.memo(
235367
({
236368
message,
@@ -283,7 +415,7 @@ export const LLMMessageDisplay = React.memo(
283415
: Object.fromEntries(Object.entries(additionalKwargs).filter(([, value]) => value !== undefined))
284416

285417
const renderMessageContent = (
286-
content: string | { type: string; content: string } | VercelSDKImageMessage | object[],
418+
content: string | { type: string; content: string } | VercelSDKImageMessage | MultiModalContentItem[],
287419
searchQuery?: string
288420
): JSX.Element | null => {
289421
if (!content) {
@@ -296,52 +428,7 @@ export const LLMMessageDisplay = React.memo(
296428
<>
297429
{content.map((item, index) => (
298430
<React.Fragment key={index}>
299-
{typeof item === 'string' ? (
300-
searchQuery?.trim() ? (
301-
<SearchHighlight
302-
string={item}
303-
substring={searchQuery}
304-
className="whitespace-pre-wrap"
305-
/>
306-
) : (
307-
<span className="whitespace-pre-wrap">{item}</span>
308-
)
309-
) : item &&
310-
typeof item === 'object' &&
311-
'type' in item &&
312-
item.type === 'text' &&
313-
'text' in item ? (
314-
searchQuery?.trim() && typeof item.text === 'string' ? (
315-
<SearchHighlight
316-
string={item.text}
317-
substring={searchQuery}
318-
className="whitespace-pre-wrap"
319-
/>
320-
) : (
321-
<span className="whitespace-pre-wrap">{item.text}</span>
322-
)
323-
) : item &&
324-
typeof item === 'object' &&
325-
'type' in item &&
326-
item.type === 'image' &&
327-
'image' in item &&
328-
typeof item.image === 'string' ? (
329-
<ImageMessageDisplay
330-
message={{
331-
content: {
332-
type: 'image',
333-
image: item.image,
334-
},
335-
}}
336-
/>
337-
) : (
338-
<HighlightedJSONViewer
339-
src={item}
340-
name={null}
341-
collapsed={5}
342-
searchQuery={searchQuery}
343-
/>
344-
)}
431+
{renderContentItem(item, searchQuery)}
345432
{index < content.length - 1 && <div className="border-t my-2" />}
346433
</React.Fragment>
347434
))}

products/llm_analytics/frontend/types.ts

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
export interface RoleBasedMessage {
22
role: string
3-
content: string | { type: string; content: string } | object[]
3+
content: string | { type: string; content: string } | MultiModalContentItem[]
44
}
55

66
export interface OpenAIToolCall {
@@ -37,6 +37,81 @@ export interface VercelSDKInputImageMessage {
3737
image_url: string
3838
}
3939

40+
export interface OpenAIImageURLMessage {
41+
type: 'image_url'
42+
image_url: {
43+
url: string
44+
}
45+
}
46+
47+
export interface OpenAIFileMessage {
48+
type: 'file'
49+
file: {
50+
file_data: string
51+
filename: string
52+
}
53+
}
54+
55+
export interface OpenAIAudioMessage {
56+
type: 'audio'
57+
data: string
58+
transcript: string
59+
id: string
60+
expires_at: number
61+
}
62+
63+
export interface AnthropicImageMessage {
64+
type: 'image'
65+
source: {
66+
type: 'base64'
67+
media_type: string
68+
data: string
69+
}
70+
}
71+
72+
export interface AnthropicDocumentMessage {
73+
type: 'document'
74+
source: {
75+
type: 'base64'
76+
media_type: string
77+
data: string
78+
}
79+
}
80+
81+
export interface GeminiAudioMessage {
82+
type: 'audio'
83+
data: string
84+
mime_type: string
85+
}
86+
87+
export interface GeminiImageMessage {
88+
type: 'image'
89+
// snake_case (Python SDK)
90+
inline_data?: {
91+
data: string
92+
mime_type: string
93+
}
94+
// camelCase (Node SDK)
95+
inlineData?: {
96+
data: string
97+
mimeType: string
98+
}
99+
}
100+
101+
export interface GeminiDocumentMessage {
102+
type: 'document' | 'image' // 'image' when SDK misdetects PDF by MIME type
103+
// snake_case (Python SDK)
104+
inline_data?: {
105+
data: string
106+
mime_type: string
107+
}
108+
// camelCase (Node SDK)
109+
inlineData?: {
110+
data: string
111+
mimeType: string
112+
}
113+
}
114+
40115
export interface VercelSDKInputTextMessage {
41116
type: 'input_text'
42117
text: string
@@ -103,3 +178,26 @@ export interface LiteLLMResponse {
103178
choices?: LiteLLMChoice[]
104179
[additionalKey: string]: any
105180
}
181+
182+
export interface TextContentItem {
183+
type: 'text'
184+
text: string
185+
}
186+
187+
export interface ImageContentItem {
188+
type: 'image'
189+
image: string
190+
}
191+
192+
export type MultiModalContentItem =
193+
| string
194+
| TextContentItem
195+
| ImageContentItem
196+
| OpenAIImageURLMessage
197+
| OpenAIFileMessage
198+
| OpenAIAudioMessage
199+
| AnthropicImageMessage
200+
| AnthropicDocumentMessage
201+
| GeminiImageMessage
202+
| GeminiDocumentMessage
203+
| GeminiAudioMessage

0 commit comments

Comments
 (0)