22
33import concurrent .futures
44import os
5+ import re
56import tempfile
67
78from typing import Any
1314from memos .llms .base import BaseLLM
1415from memos .log import get_logger
1516from memos .mem_reader .read_multi_modal .base import BaseMessageParser , _derive_key
17+ from memos .mem_reader .read_multi_modal .image_parser import ImageParser
1618from memos .mem_reader .read_multi_modal .utils import (
1719 detect_lang ,
1820 get_parser ,
@@ -129,6 +131,91 @@ def _handle_local(self, data: str) -> str:
129131 logger .info ("[FileContentParser] Local file paths are not supported in fine mode." )
130132 return ""
131133
134+ def _extract_and_process_images (self , text : str , info : dict [str , Any ], ** kwargs ) -> str :
135+ """
136+ Extract all images from markdown text and process them using ImageParser.
137+ Replaces image references with extracted text content.
138+
139+ Args:
140+ text: Markdown text containing image references
141+ info: Dictionary containing user_id and session_id
142+ **kwargs: Additional parameters for ImageParser
143+
144+ Returns:
145+ Text with image references replaced by extracted content
146+ """
147+ if not text or not self .image_parser :
148+ return text
149+
150+ # Pattern to match markdown images:  or 
151+ image_pattern = r"!\[([^\]]*)\]\(([^)]+)\)"
152+
153+ # Find all image matches first
154+ image_matches = list (re .finditer (image_pattern , text ))
155+ if not image_matches :
156+ return text
157+
158+ logger .info (f"[FileContentParser] Found { len (image_matches )} images to process" )
159+
160+ # Process images and build replacement map
161+ replacements = {}
162+ for idx , match in enumerate (image_matches , 1 ):
163+ image_url = match .group (2 )
164+
165+ try :
166+ # Construct image message format for ImageParser
167+ image_message = {
168+ "type" : "image_url" ,
169+ "image_url" : {
170+ "url" : image_url ,
171+ "detail" : "auto" ,
172+ },
173+ }
174+
175+ # Process image using ImageParser
176+ logger .info (
177+ f"[FileContentParser] Processing image { idx } /{ len (image_matches )} : { image_url } "
178+ )
179+ memory_items = self .image_parser .parse_fine (image_message , info , ** kwargs )
180+
181+ # Extract text content from memory items (only strings as requested)
182+ extracted_texts = []
183+ for item in memory_items :
184+ if hasattr (item , "memory" ) and item .memory :
185+ extracted_texts .append (str (item .memory ))
186+
187+ if extracted_texts :
188+ # Combine all extracted texts
189+ extracted_content = "\n " .join (extracted_texts )
190+ # Replace image with extracted content
191+ replacements [match .group (0 )] = (
192+ f"\n [Image Content from { image_url } ]:\n { extracted_content } \n "
193+ )
194+ else :
195+ # If no content extracted, keep original with a note
196+ logger .warning (
197+ f"[FileContentParser] No content extracted from image: { image_url } "
198+ )
199+ replacements [match .group (0 )] = (
200+ f"\n [Image: { image_url } - No content extracted]\n "
201+ )
202+
203+ except Exception as e :
204+ logger .error (f"[FileContentParser] Error processing image { image_url } : { e } " )
205+ # On error, keep original image reference
206+ replacements [match .group (0 )] = match .group (0 )
207+
208+ # Replace all images in the text
209+ processed_text = text
210+ for original , replacement in replacements .items ():
211+ processed_text = processed_text .replace (original , replacement , 1 )
212+
213+ logger .info (
214+ f"[FileContentParser] Processed { len (image_matches )} images, "
215+ f"extracted content for { sum (1 for r in replacements .values () if 'Image Content' in r )} images"
216+ )
217+ return processed_text
218+
132219 def __init__ (
133220 self ,
134221 embedder : BaseEmbedder ,
@@ -149,6 +236,8 @@ def __init__(
149236 """
150237 super ().__init__ (embedder , llm )
151238 self .parser = parser
239+ # Initialize ImageParser for processing images in markdown
240+ self .image_parser = ImageParser (embedder , llm ) if llm else None
152241
153242 # Get inner markdown hostnames from config or environment
154243 if direct_markdown_hostnames is not None :
@@ -519,6 +608,10 @@ def parse_fine(
519608 f"[FileContentParser] Failed to delete temp file { temp_file_path } : { e } "
520609 )
521610
611+ # Extract and process images from parsed_text
612+ if is_markdown and parsed_text and self .image_parser :
613+ parsed_text = self ._extract_and_process_images (parsed_text , info , ** kwargs )
614+
522615 # Extract info fields
523616 if not info :
524617 info = {}
0 commit comments