Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/example_5_with_lazyllm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ async def generate_skill_guide(skills, service, output_file):
prompt = f"Summarize these skills into a guide:\n\n{skills_text}"

# Use LazyLLM via service
summary = await service.llm_client.summarize(text=prompt)
summary = await service.llm_client.chat(text=prompt)

with open(output_file, "w", encoding="utf-8") as f:
f.write(summary)
Expand Down
2 changes: 1 addition & 1 deletion src/memu/app/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ async def _patch_category_summaries(
prompt = self._build_category_patch_prompt(
category=cat, content_before=content_before, content_after=content_after
)
tasks.append(client.summarize(prompt, system_prompt=None))
tasks.append(client.chat(prompt))
target_ids.append(cid)
if not tasks:
return
Expand Down
24 changes: 11 additions & 13 deletions src/memu/app/memorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,8 @@ async def _generate_entries_from_text(
for mtype in memory_types
]
valid_prompts = [prompt for prompt in prompts if prompt.strip()]
tasks = [client.summarize(prompt_text) for prompt_text in valid_prompts]
# These prompts are instructions that request structured output, not text summaries.
tasks = [client.chat(prompt_text) for prompt_text in valid_prompts]
responses = await asyncio.gather(*tasks)
return self._parse_structured_entries(memory_types, responses)

Expand Down Expand Up @@ -799,7 +800,7 @@ async def _preprocess_conversation(
preprocessed_text = format_conversation_for_preprocess(text)
prompt = template.format(conversation=self._escape_prompt_value(preprocessed_text))
client = llm_client or self._get_llm_client()
processed = await client.summarize(prompt, system_prompt=None)
processed = await client.chat(prompt)
_conv, segments = self._parse_conversation_preprocess_with_segments(processed, preprocessed_text)

# Important: always use the original JSON-derived, indexed conversation text for downstream
Expand Down Expand Up @@ -829,16 +830,13 @@ async def _preprocess_conversation(

async def _summarize_segment(self, segment_text: str, llm_client: Any | None = None) -> str | None:
"""Summarize a single conversation segment."""
prompt = f"""Summarize the following conversation segment in 1-2 concise sentences.
Focus on the main topic or theme discussed.

Conversation:
{segment_text}

Summary:"""
system_prompt = (
"Summarize the given conversation segment in 1-2 concise sentences. "
"Focus on the main topic or theme discussed."
)
try:
client = llm_client or self._get_llm_client()
response = await client.summarize(prompt, system_prompt=None)
response = await client.chat(segment_text, system_prompt=system_prompt)
return response.strip() if response else None
except Exception:
logger.exception("Failed to summarize segment")
Expand Down Expand Up @@ -915,7 +913,7 @@ async def _preprocess_document(
"""Preprocess document data - condense and extract caption"""
prompt = template.format(document_text=self._escape_prompt_value(text))
client = llm_client or self._get_llm_client()
processed = await client.summarize(prompt, system_prompt=None)
processed = await client.chat(prompt)
processed_content, caption = self._parse_multimodal_response(processed, "processed_content", "caption")
return [{"text": processed_content or text, "caption": caption}]

Expand All @@ -925,7 +923,7 @@ async def _preprocess_audio(
"""Preprocess audio data - format transcription and extract caption"""
prompt = template.format(transcription=self._escape_prompt_value(text))
client = llm_client or self._get_llm_client()
processed = await client.summarize(prompt, system_prompt=None)
processed = await client.chat(prompt)
processed_content, caption = self._parse_multimodal_response(processed, "processed_content", "caption")
return [{"text": processed_content or text, "caption": caption}]

Expand Down Expand Up @@ -1123,7 +1121,7 @@ async def _update_category_summaries(
if not cat or not memories:
continue
prompt = self._build_category_summary_prompt(category=cat, new_memories=memories)
tasks.append(client.summarize(prompt, system_prompt=None))
tasks.append(client.chat(prompt))
target_ids.append(cid)
if not tasks:
return updated_summaries
Expand Down
2 changes: 1 addition & 1 deletion src/memu/app/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ async def _patch_category_summaries(
prompt = self._build_category_patch_prompt(
category=cat, content_before=content_before, content_after=content_after
)
tasks.append(client.summarize(prompt, system_prompt=None))
tasks.append(client.chat(prompt))
target_ids.append(cid)
if not tasks:
return
Expand Down
8 changes: 4 additions & 4 deletions src/memu/app/retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ async def _decide_if_retrieval_needed(

sys_prompt = system_prompt or PRE_RETRIEVAL_SYSTEM_PROMPT
client = llm_client or self._get_llm_client()
response = await client.summarize(user_prompt, system_prompt=sys_prompt)
response = await client.chat(user_prompt, system_prompt=sys_prompt)
decision = self._extract_decision(response)
rewritten = self._extract_rewritten_query(response) or query

Expand Down Expand Up @@ -1235,7 +1235,7 @@ async def _llm_rank_categories(
)

client = llm_client or self._get_llm_client()
llm_response = await client.summarize(prompt, system_prompt=None)
llm_response = await client.chat(prompt)
return self._parse_llm_category_response(llm_response, store, categories=category_pool)

async def _llm_rank_items(
Expand Down Expand Up @@ -1274,7 +1274,7 @@ async def _llm_rank_items(
)

client = llm_client or self._get_llm_client()
llm_response = await client.summarize(prompt, system_prompt=None)
llm_response = await client.chat(prompt)
return self._parse_llm_item_response(llm_response, store, items=item_pool)

async def _llm_rank_resources(
Expand Down Expand Up @@ -1319,7 +1319,7 @@ async def _llm_rank_resources(
)

client = llm_client or self._get_llm_client()
llm_response = await client.summarize(prompt, system_prompt=None)
llm_response = await client.chat(prompt)
return self._parse_llm_resource_response(llm_response, store, resources=resource_pool)

def _parse_llm_category_response(
Expand Down
29 changes: 29 additions & 0 deletions src/memu/llm/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,35 @@ def __init__(
self.timeout = timeout
self.embed_model = embed_model or chat_model

async def chat(
self,
prompt: str,
*,
max_tokens: int | None = None,
system_prompt: str | None = None,
temperature: float = 0.2,
) -> tuple[str, dict[str, Any]]:
"""Generic chat completion."""
messages: list[dict[str, Any]] = []
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})

payload: dict[str, Any] = {
"model": self.chat_model,
"messages": messages,
"temperature": temperature,
}
if max_tokens is not None:
payload["max_tokens"] = max_tokens

async with httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout) as client:
resp = await client.post(self.summary_endpoint, json=payload, headers=self._headers())
resp.raise_for_status()
data = resp.json()
logger.debug("HTTP LLM chat response: %s", data)
return self.backend.parse_summary_response(data), data

async def summarize(
self, text: str, max_tokens: int | None = None, system_prompt: str | None = None
) -> tuple[str, dict[str, Any]]:
Expand Down
25 changes: 25 additions & 0 deletions src/memu/llm/lazyllm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,31 @@ async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
else:
return await asyncio.to_thread(client, *args)

async def chat(
self,
text: str,
*,
max_tokens: int | None = None,
system_prompt: str | None = None,
temperature: float = 0.2,
) -> str:
"""
Generate a summary or response for the input text using the configured LLM backend.

Args:
text: The input text to summarize or process.
max_tokens: (Optional) Maximum number of tokens to generate.
system_prompt: (Optional) System instruction to guide the LLM behavior.
Return:
The generated summary text as a string.
"""
client = lazyllm.namespace("MEMU").OnlineModule(source=self.llm_source, model=self.chat_model, type="llm")
prompt = f"{system_prompt}\n\n" if system_prompt else ""
full_prompt = f"{prompt}text:\n{text}"
LOG.debug(f"Summarizing text with {self.llm_source}/{self.chat_model}")
response = await self._call_async(client, full_prompt)
return cast(str, response)

async def summarize(
self,
text: str,
Expand Down
27 changes: 27 additions & 0 deletions src/memu/llm/openai_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,33 @@ def __init__(
self.embed_batch_size = embed_batch_size
self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)

async def chat(
self,
prompt: str,
*,
max_tokens: int | None = None,
system_prompt: str | None = None,
temperature: float = 0.2,
) -> tuple[str, ChatCompletion]:
"""Generic chat completion."""
messages: list[ChatCompletionMessageParam] = []
if system_prompt is not None:
system_message: ChatCompletionSystemMessageParam = {"role": "system", "content": system_prompt}
messages.append(system_message)

user_message: ChatCompletionUserMessageParam = {"role": "user", "content": prompt}
messages.append(user_message)

response = await self.client.chat.completions.create(
model=self.chat_model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
content = response.choices[0].message.content
logger.debug("OpenAI chat response: %s", response)
return content or "", response

async def summarize(
self,
text: str,
Expand Down
34 changes: 34 additions & 0 deletions src/memu/llm/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,40 @@ async def _call() -> Any:
response_builder=_build_text_response_view,
)

async def chat(
self,
prompt: str,
*,
max_tokens: int | None = None,
system_prompt: str | None = None,
temperature: float = 0.2,
) -> Any:
request_view = _build_text_request_view(
"chat",
prompt,
metadata={
"system_prompt_chars": len(system_prompt or ""),
"max_tokens": max_tokens,
"temperature": temperature,
},
)

async def _call() -> Any:
return await self._client.chat(
prompt,
max_tokens=max_tokens,
system_prompt=system_prompt,
temperature=temperature,
)

return await self._invoke(
kind="chat",
call_fn=_call,
request_view=request_view,
model=self._chat_model,
response_builder=_build_text_response_view,
)

async def vision(
self,
prompt: str,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lazyllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ async def test_lazyllm_client():
print("\n[Test 1] Testing summarization...")
try:
test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言,具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。" # noqa: RUF001
result = await client.summarize(test_text)
result = await client.chat(test_text)
print("✓ Summarization successful")
print(f" Result: {result[:100]}...")
except Exception as e:
Expand Down