Skip to content

Commit f8551ea

Browse files
committed
feat: add inner host
1 parent 29f64df commit f8551ea

File tree

6 files changed

+165
-7
lines changed

6 files changed

+165
-7
lines changed

examples/mem_reader/multimodal_struct_reader.py

Lines changed: 105 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,102 @@ def get_info(self) -> dict[str, Any]:
327327
]
328328
],
329329
),
330+
TestCase(
331+
name="oss_text_file",
332+
description="User message with text and file",
333+
scene_data=[
334+
[
335+
{
336+
"role": "user",
337+
"content": [
338+
{"type": "text", "text": "请阅读这个PDF,总结里面的要点。"},
339+
{
340+
"type": "file",
341+
"file": {
342+
"file_id": "file_123",
343+
"filename": "report.pdf",
344+
"file_data": "@http://139.196.232.20:9090/graph-test/algorithm/2025_11_13/1763043889_1763043782_PM1%E8%BD%A6%E9%97%B4PMT%E9%9D%B4%E5%8E%8B%E8%BE%B9%E5%8E%8B%E5%8E%8B%E5%8A%9B%E6%97%A0%E6%B3%95%E5%BB%BA%E7%AB%8B%E6%95%85%E9%9A%9C%E6%8A%A5%E5%91%8A20240720.md",
345+
},
346+
},
347+
],
348+
"chat_time": "2025-11-24T10:21:00Z",
349+
"message_id": "mm-file-1",
350+
}
351+
]
352+
],
353+
),
354+
TestCase(
355+
name="pure_data_file",
356+
description="User message with text and file",
357+
scene_data=[
358+
[
359+
{
360+
"role": "user",
361+
"content": [
362+
{"type": "text", "text": "请阅读这个PDF,总结里面的要点。"},
363+
{
364+
"type": "file",
365+
"file": {
366+
"file_id": "file_123",
367+
"filename": "report.pdf",
368+
"file_data": "明文记忆是系统与用户对话、操作等交互中动态习得,以及外部提供的、可显式管理的结构化知识形态,通常以文档、提示模板、图结构或用户规则等形式存在。它具备编辑性、可共享性与治理友好性,适合存储需要频繁修改、可审计或多方协同使用的信息。 在 MemOS 中,明文记忆可用于动态生成推理上下文、个性化偏好注入、多代理协作共享等场景,成为连接人类输入与模型认知的关键桥梁。激活记忆是指模型在推理过程中产生的瞬时性认知状态,包括 KV cache、隐藏层激活、注意力权重等中间张量结构。它通常用于维持上下文连续性、对话一致性与行为风格控制。 MemOS 将激活记忆抽象为可调度资源,支持按需唤醒、延迟卸载与结构变换。例如,某些上下文状态可以被压缩为“半结构化记忆片段”用于未来复用,也可以在任务级别转化为参数化模块,支持短期记忆的长期化演进。这一机制为模型行为一致性、风格保持与状态持续性提供了基础。",
369+
},
370+
},
371+
],
372+
"chat_time": "2025-11-24T10:21:00Z",
373+
"message_id": "mm-file-1",
374+
}
375+
]
376+
],
377+
),
378+
TestCase(
379+
name="local_data_file",
380+
description="User message with text and file",
381+
scene_data=[
382+
[
383+
{
384+
"role": "user",
385+
"content": [
386+
{"type": "text", "text": "请阅读这个PDF,总结里面的要点。"},
387+
{
388+
"type": "file",
389+
"file": {
390+
"file_id": "file_123",
391+
"filename": "report.pdf",
392+
"file_data": "./my_local_file/report.pdf",
393+
},
394+
},
395+
],
396+
"chat_time": "2025-11-24T10:21:00Z",
397+
"message_id": "mm-file-1",
398+
}
399+
]
400+
],
401+
),
402+
TestCase(
403+
name="internet_file",
404+
description="User message with text and file",
405+
scene_data=[
406+
[
407+
{
408+
"role": "user",
409+
"content": [
410+
{"type": "text", "text": "请阅读这个PDF,总结里面的要点。"},
411+
{
412+
"type": "file",
413+
"file": {
414+
"file_id": "file_123",
415+
"filename": "report.pdf",
416+
"file_data": "https://upload.wikimedia.org/wikipedia/commons/c/cb/NLC416-16jh004830-88775_%E7%B4%85%E6%A8%93%E5%A4%A2.pdf",
417+
},
418+
},
419+
],
420+
"chat_time": "2025-11-24T10:21:00Z",
421+
"message_id": "mm-file-1",
422+
}
423+
]
424+
],
425+
),
330426
TestCase(
331427
name="multimodal_mixed",
332428
description="Mixed multimodal message (text + file + image)",
@@ -661,6 +757,12 @@ def get_reader_config() -> dict[str, Any]:
661757
},
662758
}
663759

760+
# Get direct markdown hostnames from environment variable
761+
direct_markdown_hostnames = None
762+
env_hostnames = os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "")
763+
if env_hostnames:
764+
direct_markdown_hostnames = [h.strip() for h in env_hostnames.split(",") if h.strip()]
765+
664766
return {
665767
"llm": llm_config,
666768
"embedder": embedder_config,
@@ -673,6 +775,7 @@ def get_reader_config() -> dict[str, Any]:
673775
"min_sentences_per_chunk": 1,
674776
},
675777
},
778+
"direct_markdown_hostnames": direct_markdown_hostnames,
676779
}
677780

678781

@@ -863,13 +966,13 @@ def main():
863966
parser.add_argument(
864967
"--example",
865968
type=str,
866-
default="all",
969+
default="oss_text_file",
867970
help="Test case name, category name, or 'all' to run all cases (default: all)",
868971
)
869972
parser.add_argument(
870973
"--mode",
871974
choices=["fast", "fine"],
872-
default="fast",
975+
default="fine",
873976
help="Processing mode: fast (quick) or fine (with LLM) (default: fast)",
874977
)
875978
parser.add_argument(

src/memos/api/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,11 @@ def get_product_default_config() -> dict[str, Any]:
707707
},
708708
},
709709
"chat_chunker": reader_config,
710+
"direct_markdown_hostnames": [
711+
h.strip()
712+
for h in os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "").split(",")
713+
if h.strip()
714+
],
710715
},
711716
},
712717
"enable_textual_memory": True,

src/memos/configs/mem_reader.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ class SimpleStructMemReaderConfig(BaseMemReaderConfig):
4848
class MultiModalStructMemReaderConfig(BaseMemReaderConfig):
4949
"""MultiModalStruct MemReader configuration class."""
5050

51+
direct_markdown_hostnames: list[str] | None = Field(
52+
default=None,
53+
description="List of hostnames that should return markdown directly without parsing. "
54+
"If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES environment variable.",
55+
)
56+
5157

5258
class StrategyStructMemReaderConfig(BaseMemReaderConfig):
5359
"""StrategyStruct MemReader configuration class."""

src/memos/mem_reader/multi_modal_struct.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
2929
"""
3030
from memos.configs.mem_reader import SimpleStructMemReaderConfig
3131

32+
# Extract direct_markdown_hostnames before converting to SimpleStructMemReaderConfig
33+
direct_markdown_hostnames = getattr(config, "direct_markdown_hostnames", None)
34+
35+
# Create config_dict excluding direct_markdown_hostnames for SimpleStructMemReaderConfig
3236
config_dict = config.model_dump(exclude_none=True)
37+
config_dict.pop("direct_markdown_hostnames", None)
38+
3339
simple_config = SimpleStructMemReaderConfig(**config_dict)
3440
super().__init__(simple_config)
3541

@@ -38,6 +44,7 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
3844
embedder=self.embedder,
3945
llm=self.llm,
4046
parser=None,
47+
direct_markdown_hostnames=direct_markdown_hostnames,
4148
)
4249

4350
def _concat_multi_modal_memories(
@@ -271,7 +278,7 @@ def _process_multi_modal_data(
271278
sources = fast_item.metadata.sources
272279
for source in sources:
273280
items = self.multi_modal_parser.process_transfer(
274-
source, context_items=[fast_item], custom_tags=custom_tags
281+
source, context_items=[fast_item], custom_tags=custom_tags, info=info
275282
)
276283
fine_memory_items.extend(items)
277284
return fine_memory_items

src/memos/mem_reader/read_multi_modal/file_content_parser.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Parser for file content parts (RawMessageList)."""
22

33
import os
4+
import tempfile
45

56
from typing import Any
67
from urllib.parse import urlparse
@@ -30,6 +31,7 @@ def __init__(
3031
embedder: BaseEmbedder,
3132
llm: BaseLLM | None = None,
3233
parser: Any | None = None,
34+
direct_markdown_hostnames: list[str] | None = None,
3335
):
3436
"""
3537
Initialize FileContentParser.
@@ -38,10 +40,26 @@ def __init__(
3840
embedder: Embedder for generating embeddings
3941
llm: Optional LLM for fine mode processing
4042
parser: Optional parser for parsing file contents
43+
direct_markdown_hostnames: List of hostnames that should return markdown directly
44+
without parsing. If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES
45+
environment variable (comma-separated).
4146
"""
4247
super().__init__(embedder, llm)
4348
self.parser = parser
4449

50+
# Get inner markdown hostnames from config or environment
51+
if direct_markdown_hostnames is not None:
52+
self.direct_markdown_hostnames = direct_markdown_hostnames
53+
else:
54+
env_hostnames = os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "")
55+
if env_hostnames:
56+
# Support comma-separated list
57+
self.direct_markdown_hostnames = [
58+
h.strip() for h in env_hostnames.split(",") if h.strip()
59+
]
60+
else:
61+
self.direct_markdown_hostnames = []
62+
4563
def create_source(
4664
self,
4765
message: File,
@@ -309,14 +327,25 @@ def parse_fine(
309327
filename = os.path.basename(parsed_url.path) or "downloaded_file"
310328

311329
# Route based on hostname
312-
if hostname == "139.196.232.20":
313-
# Special handling for 139.196.232.20: directly use response text as markdown
330+
if hostname in self.direct_markdown_hostnames:
331+
# Special handling for configured hostnames: directly use response text as markdown
314332
logger.info(
315333
f"[FileContentParser] Using direct markdown content for {hostname}"
316334
)
317335
parsed_text = response.text
318336
else:
319-
logger.warning("[FileContentParser] Outer url not implemented now.")
337+
file_ext = os.path.splitext(filename)[1] or ".tmp"
338+
339+
with tempfile.NamedTemporaryFile(
340+
mode="wb", delete=False, suffix=file_ext
341+
) as temp_file:
342+
temp_file.write(response.content)
343+
temp_file_path = temp_file.name
344+
logger.info(
345+
f"[FileContentParser] Downloaded file to: {temp_file_path}"
346+
)
347+
# Parse the downloaded file
348+
parsed_text = self.parser.parse(temp_file_path)
320349
except requests.RequestException as e:
321350
logger.error(
322351
f"[FileContentParser] Failed to download URL {url_str}: {e}"
@@ -373,6 +402,8 @@ def parse_fine(
373402
source = self.create_source(message, info)
374403

375404
# Extract info fields
405+
if not info:
406+
info = {}
376407
info_ = info.copy()
377408
user_id = info_.pop("user_id", "")
378409
session_id = info_.pop("session_id", "")

src/memos/mem_reader/read_multi_modal/multi_modal_parser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def __init__(
3535
embedder: BaseEmbedder,
3636
llm: BaseLLM | None = None,
3737
parser: Any | None = None,
38+
direct_markdown_hostnames: list[str] | None = None,
3839
):
3940
"""
4041
Initialize MultiModalParser.
@@ -43,6 +44,9 @@ def __init__(
4344
embedder: Embedder for generating embeddings
4445
llm: Optional LLM for fine mode processing
4546
parser: Optional parser for parsing file contents
47+
direct_markdown_hostnames: List of hostnames that should return markdown directly
48+
without parsing. If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES
49+
environment variable (comma-separated). Default: ["139.196.232.20"]
4650
"""
4751
self.embedder = embedder
4852
self.llm = llm
@@ -55,7 +59,9 @@ def __init__(
5559
self.assistant_parser = AssistantParser(embedder, llm)
5660
self.tool_parser = ToolParser(embedder, llm)
5761
self.text_content_parser = TextContentParser(embedder, llm)
58-
self.file_content_parser = FileContentParser(embedder, llm, parser)
62+
self.file_content_parser = FileContentParser(
63+
embedder, llm, parser, direct_markdown_hostnames=direct_markdown_hostnames
64+
)
5965
self.image_parser = ImageParser(embedder, llm)
6066
self.audio_parser = None # future
6167

0 commit comments

Comments
 (0)