Skip to content

Commit 2763ff6

Browse files
authored
Merge branch 'dev' into feat/monitor_event_new_filed
2 parents 571994c + 6f32006 commit 2763ff6

File tree

3 files changed

+10
-6
lines changed

3 files changed

+10
-6
lines changed

src/memos/mem_reader/read_multi_modal/file_content_parser.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def create_source(
170170
chunk_index: int | None = None,
171171
chunk_total: int | None = None,
172172
chunk_content: str | None = None,
173+
file_url_flag: bool = False,
173174
) -> SourceMessage:
174175
"""Create SourceMessage from file content part."""
175176
if isinstance(message, dict):
@@ -178,6 +179,7 @@ def create_source(
178179
"type": "file",
179180
"doc_path": file_info.get("filename") or file_info.get("file_id", ""),
180181
"content": chunk_content if chunk_content else file_info.get("file_data", ""),
182+
"file_info": file_info if file_url_flag else {},
181183
}
182184
# Add chunk ordering information if provided
183185
if chunk_index is not None:
@@ -202,10 +204,7 @@ def rebuild_from_source(
202204
# Rebuild from source fields
203205
return {
204206
"type": "file",
205-
"file": {
206-
"filename": source.doc_path or "",
207-
"file_data": source.content or "",
208-
},
207+
"file": source.file_info,
209208
}
210209

211210
def _parse_file(self, file_info: dict[str, Any]) -> str:
@@ -278,7 +277,7 @@ def parse_fast(
278277
file_data = file_info.get("file_data", "")
279278
file_id = file_info.get("file_id", "")
280279
filename = file_info.get("filename", "")
281-
280+
file_url_flag = False
282281
# Build content string based on available information
283282
content_parts = []
284283

@@ -297,6 +296,7 @@ def parse_fast(
297296
content_parts.append(f"[File Data (base64/encoded): {len(file_data)} chars]")
298297
# Check if it looks like a URL
299298
elif file_data.startswith(("http://", "https://", "file://")):
299+
file_url_flag = True
300300
content_parts.append(f"[File URL: {file_data}]")
301301
else:
302302
# TODO: split into multiple memory items
@@ -348,6 +348,7 @@ def parse_fast(
348348
chunk_index=chunk_idx,
349349
chunk_total=total_chunks,
350350
chunk_content=chunk_text,
351+
file_url_flag=file_url_flag,
351352
)
352353

353354
memory_item = TextualMemoryItem(
@@ -384,6 +385,7 @@ def parse_fast(
384385
chunk_index=None,
385386
chunk_total=0,
386387
chunk_content=content,
388+
file_url_flag=file_url_flag,
387389
)
388390
memory_item = TextualMemoryItem(
389391
memory=content,

src/memos/mem_reader/read_multi_modal/user_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def create_source(
8080
message_id=message_id,
8181
doc_path=file_info.get("filename") or file_info.get("file_id", ""),
8282
content=file_info.get("file_data", ""),
83+
file_info=file_info,
8384
)
8485
)
8586
elif part_type == "image_url":

src/memos/memories/textual/item.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class SourceMessage(BaseModel):
2828
source is a chat turn.
2929
- content: Minimal reproducible snippet from the source. If omitted,
3030
upstream may fall back to `doc_path` / `url` / `message_id`.
31+
- file_info: File information for file source.
3132
- chat_time / message_id / doc_path: Locators for precisely pointing back
3233
to the original record (timestamp, message id, document path).
3334
- Extra fields: Allowed (`model_config.extra="allow"`) to carry arbitrary
@@ -40,7 +41,7 @@ class SourceMessage(BaseModel):
4041
message_id: str | None = None
4142
content: str | None = None
4243
doc_path: str | None = None
43-
44+
file_info: dict | None = None
4445
model_config = ConfigDict(extra="allow")
4546

4647

0 commit comments

Comments
 (0)