Skip to content

Commit d69dfcc

Browse files
fix(core): preserve multimodal content in getBufferString as placeholders (#10424)
1 parent d7d0bc7 commit d69dfcc

File tree

3 files changed

+122
-5
lines changed

3 files changed

+122
-5
lines changed

.changeset/warm-planes-glow.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@langchain/core": patch
3+
---
4+
5+
fix(core): preserve multimodal content in getBufferString as placeholders

libs/langchain-core/src/messages/tests/message_utils.test.ts

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,8 +624,8 @@ test("getBufferString can handle complex messages", () => {
624624
expect(bufferString2).toBe("AI: Hello there!");
625625

626626
const bufferString3 = getBufferString(messageArr3);
627-
// Image content should return empty string for text property
628-
expect(bufferString3).toBe("Human: ");
627+
// Image-only content should produce placeholders, not empty string
628+
expect(bufferString3).toBe("Human: [image][image]");
629629
});
630630

631631
test("getBufferString includes tool_calls for AI messages", () => {
@@ -700,6 +700,75 @@ test("getBufferString uses text property to avoid metadata inflation", () => {
700700
expect(bufferString).not.toContain("additional_kwargs");
701701
});
702702

703+
test("getBufferString preserves non-text content block placeholders", () => {
704+
// image and image_url -> [image]
705+
const imgMsg = new HumanMessage({
706+
content: [
707+
{ type: "image", source: { type: "base64", data: "abc" } },
708+
{
709+
type: "image_url",
710+
image_url: { url: "https://example.com/img.png" },
711+
},
712+
],
713+
});
714+
expect(getBufferString([imgMsg])).toBe("Human: [image][image]");
715+
716+
// audio and input_audio -> [audio]
717+
const audioMsg = new HumanMessage({
718+
content: [
719+
{ type: "audio", source: { type: "base64", data: "abc" } },
720+
{ type: "input_audio", data: "abc", format: "wav" },
721+
],
722+
});
723+
expect(getBufferString([audioMsg])).toBe("Human: [audio][audio]");
724+
725+
// video -> [video]
726+
const videoMsg = new HumanMessage({
727+
content: [{ type: "video", source: { type: "base64", data: "abc" } }],
728+
});
729+
expect(getBufferString([videoMsg])).toBe("Human: [video]");
730+
731+
// file -> [file]
732+
const fileMsg = new HumanMessage({
733+
content: [
734+
{
735+
type: "file",
736+
source: { type: "base64", data: "abc" },
737+
mimeType: "application/pdf",
738+
},
739+
],
740+
});
741+
expect(getBufferString([fileMsg])).toBe("Human: [file]");
742+
743+
// text-plain -> extracts text
744+
const textPlainMsg = new HumanMessage({
745+
content: [{ type: "text-plain", text: "hello world" }],
746+
});
747+
expect(getBufferString([textPlainMsg])).toBe("Human: hello world");
748+
749+
// reasoning -> excluded (empty string, filtered out)
750+
const reasoningMsg = new AIMessage({
751+
content: [
752+
{ type: "reasoning", reasoning: "thinking..." },
753+
{ type: "text", text: "answer" },
754+
],
755+
});
756+
expect(getBufferString([reasoningMsg])).toBe("AI: answer");
757+
758+
// Mixed content: text + multimodal
759+
const mixedMsg = new HumanMessage({
760+
content: [
761+
{ type: "text", text: "Look at this: " },
762+
{ type: "image", source: { type: "base64", data: "abc" } },
763+
{ type: "text", text: " and listen to this: " },
764+
{ type: "audio", source: { type: "base64", data: "def" } },
765+
],
766+
});
767+
expect(getBufferString([mixedMsg])).toBe(
768+
"Human: Look at this: [image] and listen to this: [audio]"
769+
);
770+
});
771+
703772
describe("chat message conversions", () => {
704773
it("can convert a chat message to a stored message and back", () => {
705774
const originalMessages = [

libs/langchain-core/src/messages/utils.ts

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,45 @@ export function coerceMessageLikeToMessage(
305305
}
306306
}
307307

308+
/**
309+
* Renders a single content block to a compact string representation.
310+
* Text blocks are returned as-is; multimodal blocks (image, audio, video, file)
311+
* become short placeholders like `[image]` so their existence is preserved
312+
* without inflating token counts with base64 data or metadata.
313+
*/
314+
function _contentBlockToString(
315+
block: string | { type?: string; [key: string]: unknown }
316+
): string {
317+
if (typeof block === "string") return block;
318+
switch (block.type) {
319+
case "text":
320+
return (block as { text: string }).text ?? "";
321+
case "text-plain":
322+
return (block as { text?: string }).text ?? "[text-plain file]";
323+
case "image":
324+
case "image_url":
325+
return "[image]";
326+
case "audio":
327+
case "input_audio":
328+
return "[audio]";
329+
case "video":
330+
return "[video]";
331+
case "file":
332+
return "[file]";
333+
case "reasoning":
334+
case "tool_call":
335+
case "tool_call_chunk":
336+
case "invalid_tool_call":
337+
case "server_tool_call":
338+
case "server_tool_call_chunk":
339+
case "server_tool_call_result":
340+
case "non_standard":
341+
return "";
342+
default:
343+
return block.type ? `[${block.type}]` : "";
344+
}
345+
}
346+
308347
/**
309348
* This function is used by memory classes to get a string representation
310349
* of the chat message history, based on the message content and role.
@@ -341,9 +380,13 @@ export function getBufferString(
341380
}
342381
const nameStr = m.name ? `${m.name}, ` : "";
343382

344-
// Use m.text property which extracts only text content, avoiding metadata
345-
// For non-string content (e.g., content blocks), m.text extracts only text blocks
346-
const readableContent = m.text;
383+
// Render content compactly: text as-is, multimodal blocks as placeholders
384+
const readableContent =
385+
typeof m.content === "string"
386+
? m.content
387+
: Array.isArray(m.content)
388+
? m.content.map(_contentBlockToString).filter(Boolean).join("")
389+
: "";
347390

348391
let message = `${role}: ${nameStr}${readableContent}`;
349392

0 commit comments

Comments
 (0)