Skip to content

Commit ac38046

Browse files
committed
feat: add image parser in file
1 parent 7fa7b77 commit ac38046

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

src/memos/mem_reader/read_multi_modal/file_content_parser.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import concurrent.futures
44
import os
5+
import re
56
import tempfile
67

78
from typing import Any
@@ -13,6 +14,7 @@
1314
from memos.llms.base import BaseLLM
1415
from memos.log import get_logger
1516
from memos.mem_reader.read_multi_modal.base import BaseMessageParser, _derive_key
17+
from memos.mem_reader.read_multi_modal.image_parser import ImageParser
1618
from memos.mem_reader.read_multi_modal.utils import (
1719
detect_lang,
1820
get_parser,
@@ -129,6 +131,91 @@ def _handle_local(self, data: str) -> str:
129131
logger.info("[FileContentParser] Local file paths are not supported in fine mode.")
130132
return ""
131133

134+
def _extract_and_process_images(self, text: str, info: dict[str, Any], **kwargs) -> str:
135+
"""
136+
Extract all images from markdown text and process them using ImageParser.
137+
Replaces image references with extracted text content.
138+
139+
Args:
140+
text: Markdown text containing image references
141+
info: Dictionary containing user_id and session_id
142+
**kwargs: Additional parameters for ImageParser
143+
144+
Returns:
145+
Text with image references replaced by extracted content
146+
"""
147+
if not text or not self.image_parser:
148+
return text
149+
150+
# Pattern to match markdown images: ![](url) or ![alt](url)
151+
image_pattern = r"!\[([^\]]*)\]\(([^)]+)\)"
152+
153+
# Find all image matches first
154+
image_matches = list(re.finditer(image_pattern, text))
155+
if not image_matches:
156+
return text
157+
158+
logger.info(f"[FileContentParser] Found {len(image_matches)} images to process")
159+
160+
# Process images and build replacement map
161+
replacements = {}
162+
for idx, match in enumerate(image_matches, 1):
163+
image_url = match.group(2)
164+
165+
try:
166+
# Construct image message format for ImageParser
167+
image_message = {
168+
"type": "image_url",
169+
"image_url": {
170+
"url": image_url,
171+
"detail": "auto",
172+
},
173+
}
174+
175+
# Process image using ImageParser
176+
logger.info(
177+
f"[FileContentParser] Processing image {idx}/{len(image_matches)}: {image_url}"
178+
)
179+
memory_items = self.image_parser.parse_fine(image_message, info, **kwargs)
180+
181+
# Extract text content from memory items (only strings as requested)
182+
extracted_texts = []
183+
for item in memory_items:
184+
if hasattr(item, "memory") and item.memory:
185+
extracted_texts.append(str(item.memory))
186+
187+
if extracted_texts:
188+
# Combine all extracted texts
189+
extracted_content = "\n".join(extracted_texts)
190+
# Replace image with extracted content
191+
replacements[match.group(0)] = (
192+
f"\n[Image Content from {image_url}]:\n{extracted_content}\n"
193+
)
194+
else:
195+
# If no content extracted, keep original with a note
196+
logger.warning(
197+
f"[FileContentParser] No content extracted from image: {image_url}"
198+
)
199+
replacements[match.group(0)] = (
200+
f"\n[Image: {image_url} - No content extracted]\n"
201+
)
202+
203+
except Exception as e:
204+
logger.error(f"[FileContentParser] Error processing image {image_url}: {e}")
205+
# On error, keep original image reference
206+
replacements[match.group(0)] = match.group(0)
207+
208+
# Replace all images in the text
209+
processed_text = text
210+
for original, replacement in replacements.items():
211+
processed_text = processed_text.replace(original, replacement, 1)
212+
213+
logger.info(
214+
f"[FileContentParser] Processed {len(image_matches)} images, "
215+
f"extracted content for {sum(1 for r in replacements.values() if 'Image Content' in r)} images"
216+
)
217+
return processed_text
218+
132219
def __init__(
133220
self,
134221
embedder: BaseEmbedder,
@@ -149,6 +236,8 @@ def __init__(
149236
"""
150237
super().__init__(embedder, llm)
151238
self.parser = parser
239+
# Initialize ImageParser for processing images in markdown
240+
self.image_parser = ImageParser(embedder, llm) if llm else None
152241

153242
# Get inner markdown hostnames from config or environment
154243
if direct_markdown_hostnames is not None:
@@ -519,6 +608,10 @@ def parse_fine(
519608
f"[FileContentParser] Failed to delete temp file {temp_file_path}: {e}"
520609
)
521610

611+
# Extract and process images from parsed_text
612+
if is_markdown and parsed_text and self.image_parser:
613+
parsed_text = self._extract_and_process_images(parsed_text, info, **kwargs)
614+
522615
# Extract info fields
523616
if not info:
524617
info = {}

0 commit comments

Comments
 (0)