Skip to content

Commit 5e23d37

Browse files
committed
feat: update language detaction in string-fine of multi-modal-struct
1 parent e823609 commit 5e23d37

File tree

1 file changed

+65
-2
lines changed

1 file changed

+65
-2
lines changed

src/memos/mem_reader/multi_modal_struct.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from memos.configs.mem_reader import MultiModalStructMemReaderConfig
99
from memos.context.context import ContextThreadPoolExecutor
1010
from memos.mem_reader.read_multi_modal import MultiModalParser, detect_lang
11-
from memos.mem_reader.simple_struct import SimpleStructMemReader
11+
from memos.mem_reader.simple_struct import PROMPT_DICT, SimpleStructMemReader
1212
from memos.memories.textual.item import TextualMemoryItem
1313
from memos.templates.tool_mem_prompts import TOOL_TRAJECTORY_PROMPT_EN, TOOL_TRAJECTORY_PROMPT_ZH
1414
from memos.types import MessagesType
@@ -248,6 +248,69 @@ def _build_window_from_items(
248248

249249
return aggregated_item
250250

251+
def _get_llm_response(
252+
self, mem_str: str, custom_tags: list[str] | None = None, sources: list | None = None
253+
) -> dict:
254+
"""
255+
Override parent method to improve language detection by using actual text content
256+
from sources instead of JSON-structured memory string.
257+
258+
Args:
259+
mem_str: Memory string (may contain JSON structures)
260+
custom_tags: Optional custom tags
261+
sources: Optional list of SourceMessage objects to extract text content from
262+
263+
Returns:
264+
LLM response dictionary
265+
"""
266+
# Try to extract actual text content from sources for better language detection
267+
text_for_lang_detection = mem_str
268+
if sources:
269+
source_texts = []
270+
for source in sources:
271+
if hasattr(source, "content") and source.content:
272+
source_texts.append(source.content)
273+
elif isinstance(source, dict) and source.get("content"):
274+
source_texts.append(source.get("content"))
275+
276+
# If we have text content from sources, use it for language detection
277+
if source_texts:
278+
text_for_lang_detection = " ".join(source_texts)
279+
280+
# Use the extracted text for language detection
281+
lang = detect_lang(text_for_lang_detection)
282+
template = PROMPT_DICT["chat"][lang]
283+
examples = PROMPT_DICT["chat"][f"{lang}_example"]
284+
prompt = template.replace("${conversation}", mem_str)
285+
286+
custom_tags_prompt = (
287+
PROMPT_DICT["custom_tags"][lang].replace("{custom_tags}", str(custom_tags))
288+
if custom_tags
289+
else ""
290+
)
291+
prompt = prompt.replace("${custom_tags_prompt}", custom_tags_prompt)
292+
293+
if self.config.remove_prompt_example:
294+
prompt = prompt.replace(examples, "")
295+
messages = [{"role": "user", "content": prompt}]
296+
try:
297+
response_text = self.llm.generate(messages)
298+
response_json = self.parse_json_result(response_text)
299+
except Exception as e:
300+
logger.error(f"[LLM] Exception during chat generation: {e}")
301+
response_json = {
302+
"memory list": [
303+
{
304+
"key": mem_str[:10],
305+
"memory_type": "UserMemory",
306+
"value": mem_str,
307+
"tags": [],
308+
}
309+
],
310+
"summary": mem_str,
311+
}
312+
return response_json
313+
251314
def _process_string_fine(
252315
self,
253316
fast_memory_items: list[TextualMemoryItem],
@@ -271,7 +334,7 @@ def _process_string_fine(
271334
if not isinstance(sources, list):
272335
sources = [sources]
273336
try:
274-
resp = self._get_llm_response(mem_str, custom_tags)
337+
resp = self._get_llm_response(mem_str, custom_tags, sources)
275338
except Exception as e:
276339
logger.error(f"[MultiModalFine] Error calling LLM: {e}")
277340
continue

0 commit comments

Comments
 (0)