Skip to content

Commit b37939c

Browse files
Wang-Daojiyuan.wang
andauthored
Feat/fix bug 1031 (#459)
* modify bug * modify bug * remove print --------- Co-authored-by: yuan.wang <[email protected]>
1 parent 65a2daf commit b37939c

File tree

2 files changed

+18
-10
lines changed

2 files changed

+18
-10
lines changed

src/memos/mem_reader/simple_struct.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,18 @@ def detect_lang(text):
6767
try:
6868
if not text or not isinstance(text, str):
6969
return "en"
70+
cleaned_text = text
71+
# remove role and timestamp
72+
cleaned_text = re.sub(
73+
r"\b(user|assistant|query|answer)\s*:", "", cleaned_text, flags=re.IGNORECASE
74+
)
75+
cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text)
76+
77+
# extract chinese characters
7078
chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
71-
chinese_chars = re.findall(chinese_pattern, text)
72-
if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
79+
chinese_chars = re.findall(chinese_pattern, cleaned_text)
80+
text_without_special = re.sub(r"[\s\d\W]", "", cleaned_text)
81+
if text_without_special and len(chinese_chars) / len(text_without_special) > 0.3:
7382
return "zh"
7483
return "en"
7584
except Exception:
@@ -466,15 +475,11 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
466475
if type == "chat":
467476
for items in scene_data:
468477
result = []
469-
for item in items:
470-
# Convert dictionary to string
471-
if "chat_time" in item:
472-
result.append(item)
473-
else:
474-
result.append(item)
478+
for i, item in enumerate(items):
479+
result.append(item)
475480
if len(result) >= 10:
476481
results.append(result)
477-
context = copy.deepcopy(result[-2:])
482+
context = copy.deepcopy(result[-2:]) if i + 1 < len(items) else []
478483
result = context
479484
if result:
480485
results.append(result)

src/memos/templates/instruction_completion.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ def instruct_completion(
4545
"zh": "隐式偏好 > ",
4646
"en": "implicit preference > ",
4747
}
48-
lang = detect_lang(explicit_pref_str + implicit_pref_str)
48+
lang = detect_lang(
49+
explicit_pref_str.replace("Explicit Preference:\n", "")
50+
+ implicit_pref_str.replace("Implicit Preference:\n", "")
51+
)
4952

5053
if not explicit_pref_str and not implicit_pref_str:
5154
return "", ""

0 commit comments

Comments
 (0)