NevaMind-AI · evan-ak · Feb 6, 2026 · Jan 10, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
@@ -206,7 +206,7 @@ async def generate_skill_guide(skills, service, output_file):
     prompt = f"Summarize these skills into a guide:\n\n{skills_text}"
 
     # Use LazyLLM via service
-    summary = await service.llm_client.summarize(text=prompt)
+    summary = await service.llm_client.chat(text=prompt)
 
     with open(output_file, "w", encoding="utf-8") as f:
         f.write(summary)

diff --git a/src/memu/app/crud.py b/src/memu/app/crud.py
@@ -654,7 +654,7 @@ async def _patch_category_summaries(
             prompt = self._build_category_patch_prompt(
                 category=cat, content_before=content_before, content_after=content_after
             )
-            tasks.append(client.summarize(prompt, system_prompt=None))
+            tasks.append(client.chat(prompt))
             target_ids.append(cid)
         if not tasks:
             return

diff --git a/src/memu/app/memorize.py b/src/memu/app/memorize.py
@@ -528,7 +528,8 @@ async def _generate_entries_from_text(
             for mtype in memory_types
         ]
         valid_prompts = [prompt for prompt in prompts if prompt.strip()]
-        tasks = [client.summarize(prompt_text) for prompt_text in valid_prompts]
+        # These prompts are instructions that request structured output, not text summaries.
+        tasks = [client.chat(prompt_text) for prompt_text in valid_prompts]
         responses = await asyncio.gather(*tasks)
         return self._parse_structured_entries(memory_types, responses)
 
@@ -799,7 +800,7 @@ async def _preprocess_conversation(
         preprocessed_text = format_conversation_for_preprocess(text)
         prompt = template.format(conversation=self._escape_prompt_value(preprocessed_text))
         client = llm_client or self._get_llm_client()
-        processed = await client.summarize(prompt, system_prompt=None)
+        processed = await client.chat(prompt)
         _conv, segments = self._parse_conversation_preprocess_with_segments(processed, preprocessed_text)
 
         # Important: always use the original JSON-derived, indexed conversation text for downstream
@@ -829,16 +830,13 @@ async def _preprocess_conversation(
 
     async def _summarize_segment(self, segment_text: str, llm_client: Any | None = None) -> str | None:
         """Summarize a single conversation segment."""
-        prompt = f"""Summarize the following conversation segment in 1-2 concise sentences.
-Focus on the main topic or theme discussed.
-
-Conversation:
-{segment_text}
-
-Summary:"""
+        system_prompt = (
+            "Summarize the given conversation segment in 1-2 concise sentences. "
+            "Focus on the main topic or theme discussed."
+        )
         try:
             client = llm_client or self._get_llm_client()
-            response = await client.summarize(prompt, system_prompt=None)
+            response = await client.chat(segment_text, system_prompt=system_prompt)
             return response.strip() if response else None
         except Exception:
             logger.exception("Failed to summarize segment")
@@ -915,7 +913,7 @@ async def _preprocess_document(
         """Preprocess document data - condense and extract caption"""
         prompt = template.format(document_text=self._escape_prompt_value(text))
         client = llm_client or self._get_llm_client()
-        processed = await client.summarize(prompt, system_prompt=None)
+        processed = await client.chat(prompt)
         processed_content, caption = self._parse_multimodal_response(processed, "processed_content", "caption")
         return [{"text": processed_content or text, "caption": caption}]
 
@@ -925,7 +923,7 @@ async def _preprocess_audio(
         """Preprocess audio data - format transcription and extract caption"""
         prompt = template.format(transcription=self._escape_prompt_value(text))
         client = llm_client or self._get_llm_client()
-        processed = await client.summarize(prompt, system_prompt=None)
+        processed = await client.chat(prompt)
         processed_content, caption = self._parse_multimodal_response(processed, "processed_content", "caption")
         return [{"text": processed_content or text, "caption": caption}]
 
@@ -1123,7 +1121,7 @@ async def _update_category_summaries(
             if not cat or not memories:
                 continue
             prompt = self._build_category_summary_prompt(category=cat, new_memories=memories)
-            tasks.append(client.summarize(prompt, system_prompt=None))
+            tasks.append(client.chat(prompt))
             target_ids.append(cid)
         if not tasks:
             return updated_summaries

diff --git a/src/memu/app/patch.py b/src/memu/app/patch.py
@@ -407,7 +407,7 @@ async def _patch_category_summaries(
             prompt = self._build_category_patch_prompt(
                 category=cat, content_before=content_before, content_after=content_after
             )
-            tasks.append(client.summarize(prompt, system_prompt=None))
+            tasks.append(client.chat(prompt))
             target_ids.append(cid)
         if not tasks:
             return

diff --git a/src/memu/app/retrieve.py b/src/memu/app/retrieve.py
@@ -777,7 +777,7 @@ async def _decide_if_retrieval_needed(
 
         sys_prompt = system_prompt or PRE_RETRIEVAL_SYSTEM_PROMPT
         client = llm_client or self._get_llm_client()
-        response = await client.summarize(user_prompt, system_prompt=sys_prompt)
+        response = await client.chat(user_prompt, system_prompt=sys_prompt)
         decision = self._extract_decision(response)
         rewritten = self._extract_rewritten_query(response) or query
 
@@ -1235,7 +1235,7 @@ async def _llm_rank_categories(
         )
 
         client = llm_client or self._get_llm_client()
-        llm_response = await client.summarize(prompt, system_prompt=None)
+        llm_response = await client.chat(prompt)
         return self._parse_llm_category_response(llm_response, store, categories=category_pool)
 
     async def _llm_rank_items(
@@ -1274,7 +1274,7 @@ async def _llm_rank_items(
         )
 
         client = llm_client or self._get_llm_client()
-        llm_response = await client.summarize(prompt, system_prompt=None)
+        llm_response = await client.chat(prompt)
         return self._parse_llm_item_response(llm_response, store, items=item_pool)
 
     async def _llm_rank_resources(
@@ -1319,7 +1319,7 @@ async def _llm_rank_resources(
         )
 
         client = llm_client or self._get_llm_client()
-        llm_response = await client.summarize(prompt, system_prompt=None)
+        llm_response = await client.chat(prompt)
         return self._parse_llm_resource_response(llm_response, store, resources=resource_pool)
 
     def _parse_llm_category_response(

diff --git a/src/memu/llm/http_client.py b/src/memu/llm/http_client.py
@@ -103,6 +103,35 @@ def __init__(
         self.timeout = timeout
         self.embed_model = embed_model or chat_model
 
+    async def chat(
+        self,
+        prompt: str,
+        *,
+        max_tokens: int | None = None,
+        system_prompt: str | None = None,
+        temperature: float = 0.2,
+    ) -> tuple[str, dict[str, Any]]:
+        """Generic chat completion."""
+        messages: list[dict[str, Any]] = []
+        if system_prompt is not None:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
+        payload: dict[str, Any] = {
+            "model": self.chat_model,
+            "messages": messages,
+            "temperature": temperature,
+        }
+        if max_tokens is not None:
+            payload["max_tokens"] = max_tokens
+
+        async with httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) as client:
+            resp = await client.post(self.summary_endpoint, json=payload, headers=self._headers())
+            resp.raise_for_status()
+            data = resp.json()
+        logger.debug("HTTP LLM chat response: %s", data)
+        return self.backend.parse_summary_response(data), data
+
     async def summarize(
         self, text: str, max_tokens: int | None = None, system_prompt: str | None = None
     ) -> tuple[str, dict[str, Any]]:

diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
@@ -41,6 +41,31 @@ async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
         else:
             return await asyncio.to_thread(client, *args)
 
+    async def chat(
+        self,
+        text: str,
+        *,
+        max_tokens: int | None = None,
+        system_prompt: str | None = None,
+        temperature: float = 0.2,
+    ) -> str:
+        """
+        Generate a summary or response for the input text using the configured LLM backend.
+
+        Args:
+            text: The input text to summarize or process.
+            max_tokens: (Optional) Maximum number of tokens to generate.
+            system_prompt: (Optional) System instruction to guide the LLM behavior.
+        Return:
+            The generated summary text as a string.
+        """
+        client = lazyllm.namespace("MEMU").OnlineModule(source=self.llm_source, model=self.chat_model, type="llm")
+        prompt = f"{system_prompt}\n\n" if system_prompt else ""
+        full_prompt = f"{prompt}text:\n{text}"
+        LOG.debug(f"Summarizing text with {self.llm_source}/{self.chat_model}")
+        response = await self._call_async(client, full_prompt)
+        return cast(str, response)
+
     async def summarize(
         self,
         text: str,

diff --git a/src/memu/llm/openai_sdk.py b/src/memu/llm/openai_sdk.py
@@ -36,6 +36,33 @@ def __init__(
         self.embed_batch_size = embed_batch_size
         self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
 
+    async def chat(
+        self,
+        prompt: str,
+        *,
+        max_tokens: int | None = None,
+        system_prompt: str | None = None,
+        temperature: float = 0.2,
+    ) -> tuple[str, ChatCompletion]:
+        """Generic chat completion."""
+        messages: list[ChatCompletionMessageParam] = []
+        if system_prompt is not None:
+            system_message: ChatCompletionSystemMessageParam = {"role": "system", "content": system_prompt}
+            messages.append(system_message)
+
+        user_message: ChatCompletionUserMessageParam = {"role": "user", "content": prompt}
+        messages.append(user_message)
+
+        response = await self.client.chat.completions.create(
+            model=self.chat_model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        content = response.choices[0].message.content
+        logger.debug("OpenAI chat response: %s", response)
+        return content or "", response
+
     async def summarize(
         self,
         text: str,

diff --git a/src/memu/llm/wrapper.py b/src/memu/llm/wrapper.py
@@ -271,6 +271,40 @@ async def _call() -> Any:
             response_builder=_build_text_response_view,
         )
 
+    async def chat(
+        self,
+        prompt: str,
+        *,
+        max_tokens: int | None = None,
+        system_prompt: str | None = None,
+        temperature: float = 0.2,
+    ) -> Any:
+        request_view = _build_text_request_view(
+            "chat",
+            prompt,
+            metadata={
+                "system_prompt_chars": len(system_prompt or ""),
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+            },
+        )
+
+        async def _call() -> Any:
+            return await self._client.chat(
+                prompt,
+                max_tokens=max_tokens,
+                system_prompt=system_prompt,
+                temperature=temperature,
+            )
+
+        return await self._invoke(
+            kind="chat",
+            call_fn=_call,
+            request_view=request_view,
+            model=self._chat_model,
+            response_builder=_build_text_response_view,
+        )
+
     async def vision(
         self,
         prompt: str,

diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
@@ -45,7 +45,7 @@ async def test_lazyllm_client():
     print("\n[Test 1] Testing summarization...")
     try:
         test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言，具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。"  # noqa: RUF001
-        result = await client.summarize(test_text)
+        result = await client.chat(test_text)
         print("✓ Summarization successful")
         print(f"  Result: {result[:100]}...")
     except Exception as e: