feat: add inner host

CaralHsi · CaralHsi · commit f8551ea91048 · 2025-12-02T15:58:17.000+08:00
diff --git a/examples/mem_reader/multimodal_struct_reader.py b/examples/mem_reader/multimodal_struct_reader.py
@@ -327,6 +327,102 @@ def get_info(self) -> dict[str, Any]:
             ]
         ],
     ),
+    TestCase(
+        name="oss_text_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "@http://139.196.232.20:9090/graph-test/algorithm/2025_11_13/1763043889_1763043782_PM1%E8%BD%A6%E9%97%B4PMT%E9%9D%B4%E5%8E%8B%E8%BE%B9%E5%8E%8B%E5%8E%8B%E5%8A%9B%E6%97%A0%E6%B3%95%E5%BB%BA%E7%AB%8B%E6%95%85%E9%9A%9C%E6%8A%A5%E5%91%8A20240720.md",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
+    TestCase(
+        name="pure_data_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "明文记忆是系统与用户对话、操作等交互中动态习得，以及外部提供的、可显式管理的结构化知识形态，通常以文档、提示模板、图结构或用户规则等形式存在。它具备编辑性、可共享性与治理友好性，适合存储需要频繁修改、可审计或多方协同使用的信息。 在 MemOS 中，明文记忆可用于动态生成推理上下文、个性化偏好注入、多代理协作共享等场景，成为连接人类输入与模型认知的关键桥梁。激活记忆是指模型在推理过程中产生的瞬时性认知状态，包括 KV cache、隐藏层激活、注意力权重等中间张量结构。它通常用于维持上下文连续性、对话一致性与行为风格控制。 MemOS 将激活记忆抽象为可调度资源，支持按需唤醒、延迟卸载与结构变换。例如，某些上下文状态可以被压缩为“半结构化记忆片段”用于未来复用，也可以在任务级别转化为参数化模块，支持短期记忆的长期化演进。这一机制为模型行为一致性、风格保持与状态持续性提供了基础。",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
+    TestCase(
+        name="local_data_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "./my_local_file/report.pdf",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
+    TestCase(
+        name="internet_file",
+        description="User message with text and file",
+        scene_data=[
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "请阅读这个PDF，总结里面的要点。"},
+                        {
+                            "type": "file",
+                            "file": {
+                                "file_id": "file_123",
+                                "filename": "report.pdf",
+                                "file_data": "https://upload.wikimedia.org/wikipedia/commons/c/cb/NLC416-16jh004830-88775_%E7%B4%85%E6%A8%93%E5%A4%A2.pdf",
+                            },
+                        },
+                    ],
+                    "chat_time": "2025-11-24T10:21:00Z",
+                    "message_id": "mm-file-1",
+                }
+            ]
+        ],
+    ),
     TestCase(
         name="multimodal_mixed",
         description="Mixed multimodal message (text + file + image)",
@@ -661,6 +757,12 @@ def get_reader_config() -> dict[str, Any]:
             },
         }
 
+    # Get direct markdown hostnames from environment variable
+    direct_markdown_hostnames = None
+    env_hostnames = os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "")
+    if env_hostnames:
+        direct_markdown_hostnames = [h.strip() for h in env_hostnames.split(",") if h.strip()]
+
     return {
         "llm": llm_config,
         "embedder": embedder_config,
@@ -673,6 +775,7 @@ def get_reader_config() -> dict[str, Any]:
                 "min_sentences_per_chunk": 1,
             },
         },
+        "direct_markdown_hostnames": direct_markdown_hostnames,
     }
 
 
@@ -863,13 +966,13 @@ def main():
     parser.add_argument(
         "--example",
         type=str,
-        default="all",
+        default="oss_text_file",
         help="Test case name, category name, or 'all' to run all cases (default: all)",
     )
     parser.add_argument(
         "--mode",
         choices=["fast", "fine"],
-        default="fast",
+        default="fine",
         help="Processing mode: fast (quick) or fine (with LLM) (default: fast)",
     )
     parser.add_argument(
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -707,6 +707,11 @@ def get_product_default_config() -> dict[str, Any]:
                         },
                     },
                     "chat_chunker": reader_config,
+                    "direct_markdown_hostnames": [
+                        h.strip()
+                        for h in os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "").split(",")
+                        if h.strip()
+                    ],
                 },
             },
             "enable_textual_memory": True,
diff --git a/src/memos/configs/mem_reader.py b/src/memos/configs/mem_reader.py
@@ -48,6 +48,12 @@ class SimpleStructMemReaderConfig(BaseMemReaderConfig):
 class MultiModalStructMemReaderConfig(BaseMemReaderConfig):
     """MultiModalStruct MemReader configuration class."""
 
+    direct_markdown_hostnames: list[str] | None = Field(
+        default=None,
+        description="List of hostnames that should return markdown directly without parsing. "
+        "If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES environment variable.",
+    )
+
 
 class StrategyStructMemReaderConfig(BaseMemReaderConfig):
     """StrategyStruct MemReader configuration class."""
diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py
@@ -29,7 +29,13 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
         """
         from memos.configs.mem_reader import SimpleStructMemReaderConfig
 
+        # Extract direct_markdown_hostnames before converting to SimpleStructMemReaderConfig
+        direct_markdown_hostnames = getattr(config, "direct_markdown_hostnames", None)
+
+        # Create config_dict excluding direct_markdown_hostnames for SimpleStructMemReaderConfig
         config_dict = config.model_dump(exclude_none=True)
+        config_dict.pop("direct_markdown_hostnames", None)
+
         simple_config = SimpleStructMemReaderConfig(**config_dict)
         super().__init__(simple_config)
 
@@ -38,6 +44,7 @@ def __init__(self, config: MultiModalStructMemReaderConfig):
             embedder=self.embedder,
             llm=self.llm,
             parser=None,
+            direct_markdown_hostnames=direct_markdown_hostnames,
         )
 
     def _concat_multi_modal_memories(
@@ -271,7 +278,7 @@ def _process_multi_modal_data(
                 sources = fast_item.metadata.sources
                 for source in sources:
                     items = self.multi_modal_parser.process_transfer(
-                        source, context_items=[fast_item], custom_tags=custom_tags
+                        source, context_items=[fast_item], custom_tags=custom_tags, info=info
                     )
                     fine_memory_items.extend(items)
             return fine_memory_items
diff --git a/src/memos/mem_reader/read_multi_modal/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py
@@ -1,6 +1,7 @@
 """Parser for file content parts (RawMessageList)."""
 
 import os
+import tempfile
 
 from typing import Any
 from urllib.parse import urlparse
@@ -30,6 +31,7 @@ def __init__(
         embedder: BaseEmbedder,
         llm: BaseLLM | None = None,
         parser: Any | None = None,
+        direct_markdown_hostnames: list[str] | None = None,
     ):
         """
         Initialize FileContentParser.
@@ -38,10 +40,26 @@ def __init__(
             embedder: Embedder for generating embeddings
             llm: Optional LLM for fine mode processing
             parser: Optional parser for parsing file contents
+            direct_markdown_hostnames: List of hostnames that should return markdown directly
+                without parsing. If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES
+                environment variable (comma-separated).
         """
         super().__init__(embedder, llm)
         self.parser = parser
 
+        # Get inner markdown hostnames from config or environment
+        if direct_markdown_hostnames is not None:
+            self.direct_markdown_hostnames = direct_markdown_hostnames
+        else:
+            env_hostnames = os.getenv("FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES", "")
+            if env_hostnames:
+                # Support comma-separated list
+                self.direct_markdown_hostnames = [
+                    h.strip() for h in env_hostnames.split(",") if h.strip()
+                ]
+            else:
+                self.direct_markdown_hostnames = []
+
     def create_source(
         self,
         message: File,
@@ -309,14 +327,25 @@ def parse_fine(
                                 filename = os.path.basename(parsed_url.path) or "downloaded_file"
 
                             # Route based on hostname
-                            if hostname == "139.196.232.20":
-                                # Special handling for 139.196.232.20: directly use response text as markdown
+                            if hostname in self.direct_markdown_hostnames:
+                                # Special handling for configured hostnames: directly use response text as markdown
                                 logger.info(
                                     f"[FileContentParser] Using direct markdown content for {hostname}"
                                 )
                                 parsed_text = response.text
                             else:
-                                logger.warning("[FileContentParser] Outer url not implemented now.")
+                                file_ext = os.path.splitext(filename)[1] or ".tmp"
+
+                                with tempfile.NamedTemporaryFile(
+                                    mode="wb", delete=False, suffix=file_ext
+                                ) as temp_file:
+                                    temp_file.write(response.content)
+                                temp_file_path = temp_file.name
+                                logger.info(
+                                    f"[FileContentParser] Downloaded file to: {temp_file_path}"
+                                )
+                                # Parse the downloaded file
+                                parsed_text = self.parser.parse(temp_file_path)
                         except requests.RequestException as e:
                             logger.error(
                                 f"[FileContentParser] Failed to download URL {url_str}: {e}"
@@ -373,6 +402,8 @@ def parse_fine(
         source = self.create_source(message, info)
 
         # Extract info fields
+        if not info:
+            info = {}
         info_ = info.copy()
         user_id = info_.pop("user_id", "")
         session_id = info_.pop("session_id", "")
diff --git a/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py b/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py
@@ -35,6 +35,7 @@ def __init__(
         embedder: BaseEmbedder,
         llm: BaseLLM | None = None,
         parser: Any | None = None,
+        direct_markdown_hostnames: list[str] | None = None,
     ):
         """
         Initialize MultiModalParser.
@@ -43,6 +44,9 @@ def __init__(
             embedder: Embedder for generating embeddings
             llm: Optional LLM for fine mode processing
             parser: Optional parser for parsing file contents
+            direct_markdown_hostnames: List of hostnames that should return markdown directly
+                without parsing. If None, reads from FILE_PARSER_DIRECT_MARKDOWN_HOSTNAMES
+                environment variable (comma-separated). Default: ["139.196.232.20"]
         """
         self.embedder = embedder
         self.llm = llm
@@ -55,7 +59,9 @@ def __init__(
         self.assistant_parser = AssistantParser(embedder, llm)
         self.tool_parser = ToolParser(embedder, llm)
         self.text_content_parser = TextContentParser(embedder, llm)
-        self.file_content_parser = FileContentParser(embedder, llm, parser)
+        self.file_content_parser = FileContentParser(
+            embedder, llm, parser, direct_markdown_hostnames=direct_markdown_hostnames
+        )
         self.image_parser = ImageParser(embedder, llm)
         self.audio_parser = None  # future