@@ -47,6 +47,61 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
4747 direct_markdown_hostnames = direct_markdown_hostnames ,
4848 )
4949
50+ def _split_large_memory_item (
51+ self , item : TextualMemoryItem , max_tokens : int
52+ ) -> list [TextualMemoryItem ]:
53+ """
54+ Split a single memory item that exceeds max_tokens into multiple chunks.
55+
56+ Args:
57+ item: TextualMemoryItem to split
58+ max_tokens: Maximum tokens per chunk
59+
60+ Returns:
61+ List of TextualMemoryItem chunks
62+ """
63+ item_text = item .memory or ""
64+ if not item_text :
65+ return [item ]
66+
67+ item_tokens = self ._count_tokens (item_text )
68+ if item_tokens <= max_tokens :
69+ return [item ]
70+
71+ # Use chunker to split the text
72+ try :
73+ chunks = self .chunker .chunk (item_text )
74+ split_items = []
75+
76+ for chunk in chunks :
77+ # Chunk objects have a 'text' attribute
78+ chunk_text = chunk .text
79+ if not chunk_text or not chunk_text .strip ():
80+ continue
81+
82+ # Create a new memory item for each chunk, preserving original metadata
83+ split_item = self ._make_memory_item (
84+ value = chunk_text ,
85+ info = {
86+ "user_id" : item .metadata .user_id ,
87+ "session_id" : item .metadata .session_id ,
88+ ** (item .metadata .info or {}),
89+ },
90+ memory_type = item .metadata .memory_type ,
91+ tags = item .metadata .tags or [],
92+ key = item .metadata .key ,
93+ sources = item .metadata .sources or [],
94+ background = item .metadata .background or "" ,
95+ )
96+ split_items .append (split_item )
97+
98+ return split_items if split_items else [item ]
99+ except Exception as e :
100+ logger .warning (
101+ f"[MultiModalStruct] Failed to split large memory item: { e } . Returning original item."
102+ )
103+ return [item ]
104+
50105 def _concat_multi_modal_memories (
51106 self , all_memory_items : list [TextualMemoryItem ], max_tokens = None , overlap = 200
52107 ) -> list [TextualMemoryItem ]:
@@ -57,35 +112,49 @@ def _concat_multi_modal_memories(
57112 2. Each window has overlap tokens for context continuity
58113 3. Aggregates items within each window into a single memory item
59114 4. Determines memory_type based on roles in each window
115+ 5. Splits single large memory items that exceed max_tokens
60116 """
61117 if not all_memory_items :
62118 return []
63119
64- # If only one item, return as-is (no need to aggregate)
65- if len (all_memory_items ) == 1 :
66- return all_memory_items
67-
68120 max_tokens = max_tokens or self .chat_window_max_tokens
121+
122+ # Split large memory items before processing
123+ processed_items = []
124+ for item in all_memory_items :
125+ item_text = item .memory or ""
126+ item_tokens = self ._count_tokens (item_text )
127+ if item_tokens > max_tokens :
128+ # Split the large item into multiple chunks
129+ split_items = self ._split_large_memory_item (item , max_tokens )
130+ processed_items .extend (split_items )
131+ else :
132+ processed_items .append (item )
133+
134+ # If only one item after processing, return as-is
135+ if len (processed_items ) == 1 :
136+ return processed_items
137+
69138 windows = []
70139 buf_items = []
71140 cur_text = ""
72141
73142 # Extract info from first item (all items should have same user_id, session_id)
74- first_item = all_memory_items [0 ]
143+ first_item = processed_items [0 ]
75144 info = {
76145 "user_id" : first_item .metadata .user_id ,
77146 "session_id" : first_item .metadata .session_id ,
78147 ** (first_item .metadata .info or {}),
79148 }
80149
81- for _idx , item in enumerate (all_memory_items ):
150+ for _idx , item in enumerate (processed_items ):
82151 item_text = item .memory or ""
83152 # Ensure line ends with newline (same format as simple_struct)
84153 line = item_text if item_text .endswith ("\n " ) else f"{ item_text } \n "
85154
86155 # Check if adding this item would exceed max_tokens (same logic as _iter_chat_windows)
87- # Note: The `and cur_text` condition ensures that single large messages are not truncated.
88- # If cur_text is empty (new window), even if line exceeds max_tokens, it won't trigger output.
156+ # Note: After splitting large items, each item should be <= max_tokens,
157+ # but we still check to handle edge cases
89158 if self ._count_tokens (cur_text + line ) > max_tokens and cur_text :
90159 # Yield current window
91160 window = self ._build_window_from_items (buf_items , info )
@@ -102,8 +171,7 @@ def _concat_multi_modal_memories(
102171 # Recalculate cur_text from remaining items
103172 cur_text = "" .join ([it .memory or "" for it in buf_items ])
104173
105- # Add item to current window (always, even if it exceeds max_tokens)
106- # This ensures single large messages are not truncated, same as simple_struct
174+ # Add item to current window
107175 buf_items .append (item )
108176 # Recalculate cur_text from all items in buffer (same as _iter_chat_windows)
109177 cur_text = "" .join ([it .memory or "" for it in buf_items ])
@@ -255,14 +323,12 @@ def _process_multi_modal_data(
255323 for msg in scene_data_info :
256324 items = self .multi_modal_parser .parse (msg , info , mode = "fast" , ** kwargs )
257325 all_memory_items .extend (items )
258- fast_memory_items = self ._concat_multi_modal_memories (all_memory_items )
259-
260326 else :
261327 # Parse as single message
262- fast_memory_items = self .multi_modal_parser .parse (
328+ all_memory_items = self .multi_modal_parser .parse (
263329 scene_data_info , info , mode = "fast" , ** kwargs
264330 )
265-
331+ fast_memory_items = self . _concat_multi_modal_memories ( all_memory_items )
266332 if mode == "fast" :
267333 return fast_memory_items
268334 else :
0 commit comments