@@ -49,6 +49,61 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
4949 direct_markdown_hostnames = direct_markdown_hostnames ,
5050 )
5151
52+ def _split_large_memory_item (
53+ self , item : TextualMemoryItem , max_tokens : int
54+ ) -> list [TextualMemoryItem ]:
55+ """
56+ Split a single memory item that exceeds max_tokens into multiple chunks.
57+
58+ Args:
59+ item: TextualMemoryItem to split
60+ max_tokens: Maximum tokens per chunk
61+
62+ Returns:
63+ List of TextualMemoryItem chunks
64+ """
65+ item_text = item .memory or ""
66+ if not item_text :
67+ return [item ]
68+
69+ item_tokens = self ._count_tokens (item_text )
70+ if item_tokens <= max_tokens :
71+ return [item ]
72+
73+ # Use chunker to split the text
74+ try :
75+ chunks = self .chunker .chunk (item_text )
76+ split_items = []
77+
78+ for chunk in chunks :
79+ # Chunk objects have a 'text' attribute
80+ chunk_text = chunk .text
81+ if not chunk_text or not chunk_text .strip ():
82+ continue
83+
84+ # Create a new memory item for each chunk, preserving original metadata
85+ split_item = self ._make_memory_item (
86+ value = chunk_text ,
87+ info = {
88+ "user_id" : item .metadata .user_id ,
89+ "session_id" : item .metadata .session_id ,
90+ ** (item .metadata .info or {}),
91+ },
92+ memory_type = item .metadata .memory_type ,
93+ tags = item .metadata .tags or [],
94+ key = item .metadata .key ,
95+ sources = item .metadata .sources or [],
96+ background = item .metadata .background or "" ,
97+ )
98+ split_items .append (split_item )
99+
100+ return split_items if split_items else [item ]
101+ except Exception as e :
102+ logger .warning (
103+ f"[MultiModalStruct] Failed to split large memory item: { e } . Returning original item."
104+ )
105+ return [item ]
106+
52107 def _concat_multi_modal_memories (
53108 self , all_memory_items : list [TextualMemoryItem ], max_tokens = None , overlap = 200
54109 ) -> list [TextualMemoryItem ]:
@@ -59,35 +114,49 @@ def _concat_multi_modal_memories(
59114 2. Each window has overlap tokens for context continuity
60115 3. Aggregates items within each window into a single memory item
61116 4. Determines memory_type based on roles in each window
117+ 5. Splits single large memory items that exceed max_tokens
62118 """
63119 if not all_memory_items :
64120 return []
65121
66- # If only one item, return as-is (no need to aggregate)
67- if len (all_memory_items ) == 1 :
68- return all_memory_items
69-
70122 max_tokens = max_tokens or self .chat_window_max_tokens
123+
124+ # Split large memory items before processing
125+ processed_items = []
126+ for item in all_memory_items :
127+ item_text = item .memory or ""
128+ item_tokens = self ._count_tokens (item_text )
129+ if item_tokens > max_tokens :
130+ # Split the large item into multiple chunks
131+ split_items = self ._split_large_memory_item (item , max_tokens )
132+ processed_items .extend (split_items )
133+ else :
134+ processed_items .append (item )
135+
136+ # If only one item after processing, return as-is
137+ if len (processed_items ) == 1 :
138+ return processed_items
139+
71140 windows = []
72141 buf_items = []
73142 cur_text = ""
74143
75144 # Extract info from first item (all items should have same user_id, session_id)
76- first_item = all_memory_items [0 ]
145+ first_item = processed_items [0 ]
77146 info = {
78147 "user_id" : first_item .metadata .user_id ,
79148 "session_id" : first_item .metadata .session_id ,
80149 ** (first_item .metadata .info or {}),
81150 }
82151
83- for _idx , item in enumerate (all_memory_items ):
152+ for _idx , item in enumerate (processed_items ):
84153 item_text = item .memory or ""
85154 # Ensure line ends with newline (same format as simple_struct)
86155 line = item_text if item_text .endswith ("\n " ) else f"{ item_text } \n "
87156
88157 # Check if adding this item would exceed max_tokens (same logic as _iter_chat_windows)
89- # Note: The `and cur_text` condition ensures that single large messages are not truncated.
90- # If cur_text is empty (new window), even if line exceeds max_tokens, it won't trigger output.
158+ # Note: After splitting large items, each item should be <= max_tokens,
159+ # but we still check to handle edge cases
91160 if self ._count_tokens (cur_text + line ) > max_tokens and cur_text :
92161 # Yield current window
93162 window = self ._build_window_from_items (buf_items , info )
@@ -104,8 +173,7 @@ def _concat_multi_modal_memories(
104173 # Recalculate cur_text from remaining items
105174 cur_text = "" .join ([it .memory or "" for it in buf_items ])
106175
107- # Add item to current window (always, even if it exceeds max_tokens)
108- # This ensures single large messages are not truncated, same as simple_struct
176+ # Add item to current window
109177 buf_items .append (item )
110178 # Recalculate cur_text from all items in buffer (same as _iter_chat_windows)
111179 cur_text = "" .join ([it .memory or "" for it in buf_items ])
@@ -312,14 +380,12 @@ def _process_multi_modal_data(
312380 for msg in scene_data_info :
313381 items = self .multi_modal_parser .parse (msg , info , mode = "fast" , ** kwargs )
314382 all_memory_items .extend (items )
315- fast_memory_items = self ._concat_multi_modal_memories (all_memory_items )
316-
317383 else :
318384 # Parse as single message
319- fast_memory_items = self .multi_modal_parser .parse (
385+ all_memory_items = self .multi_modal_parser .parse (
320386 scene_data_info , info , mode = "fast" , ** kwargs
321387 )
322-
388+ fast_memory_items = self . _concat_multi_modal_memories ( all_memory_items )
323389 if mode == "fast" :
324390 return fast_memory_items
325391 else :
0 commit comments