Skip to content

Commit 928393b

Browse files
author
yuan.wang
committed
modify bug
1 parent 65a2daf commit 928393b

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

src/memos/mem_reader/simple_struct.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,17 @@ def detect_lang(text):
6767
try:
6868
if not text or not isinstance(text, str):
6969
return "en"
70+
cleaned_text = text
71+
# remove role and timestamp
72+
cleaned_text = re.sub(r'\b(user|assistant|query|answer)\s*:', '', cleaned_text, flags=re.IGNORECASE)
73+
cleaned_text = re.sub(r'\[[\d\-:\s]+\]', '', cleaned_text)
74+
75+
# extract chinese characters
7076
chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
71-
chinese_chars = re.findall(chinese_pattern, text)
72-
if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
77+
chinese_chars = re.findall(chinese_pattern, cleaned_text)
78+
text_without_special = re.sub(r"[\s\d\W]", "", cleaned_text)
79+
print(text_without_special)
80+
if text_without_special and len(chinese_chars) / len(text_without_special) > 0.3:
7381
return "zh"
7482
return "en"
7583
except Exception:
@@ -466,15 +474,11 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
466474
if type == "chat":
467475
for items in scene_data:
468476
result = []
469-
for item in items:
470-
# Convert dictionary to string
471-
if "chat_time" in item:
472-
result.append(item)
473-
else:
474-
result.append(item)
477+
for i, item in enumerate(items):
478+
result.append(item)
475479
if len(result) >= 10:
476480
results.append(result)
477-
context = copy.deepcopy(result[-2:])
481+
context = copy.deepcopy(result[-2:]) if i + 1 < len(items) else []
478482
result = context
479483
if result:
480484
results.append(result)

src/memos/templates/instruction_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def instruct_completion(
4545
"zh": "隐式偏好 > ",
4646
"en": "implicit preference > ",
4747
}
48-
lang = detect_lang(explicit_pref_str + implicit_pref_str)
48+
lang = detect_lang(explicit_pref_str.replace("Explicit Preference:\n", "") + implicit_pref_str.replace("Implicit Preference:\n", ""))
4949

5050
if not explicit_pref_str and not implicit_pref_str:
5151
return "", ""

0 commit comments

Comments
 (0)