Skip to content

Commit 9ae9737

Browse files
authored
fix: reference for some string (#211)
* feat: reorgniza code * feat: re org code and fix bad case ref for equation
1 parent 0a9aa70 commit 9ae9737

File tree

3 files changed

+138
-112
lines changed

3 files changed

+138
-112
lines changed

src/memos/mem_os/product.py

Lines changed: 5 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
filter_nodes_by_tree_ids,
2323
remove_embedding_recursive,
2424
sort_children_by_memory_type,
25-
split_continuous_references,
25+
)
26+
from memos.mem_os.utils.reference_utils import (
27+
process_streaming_references_complete,
2628
)
2729
from memos.mem_scheduler.schemas.general_schemas import (
2830
ANSWER_LABEL,
@@ -406,71 +408,6 @@ def _build_enhance_system_prompt(
406408
return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
407409
return MEMOS_PRODUCT_ENHANCE_PROMPT
408410

409-
def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
410-
"""
411-
Complete streaming reference processing to ensure reference tags are never split.
412-
413-
Args:
414-
text_buffer (str): The accumulated text buffer.
415-
416-
Returns:
417-
tuple[str, str]: (processed_text, remaining_buffer)
418-
"""
419-
import re
420-
421-
# Pattern to match complete reference tags: [refid:memoriesID]
422-
complete_pattern = r"\[\d+:[^\]]+\]"
423-
424-
# Find all complete reference tags
425-
complete_matches = list(re.finditer(complete_pattern, text_buffer))
426-
427-
if complete_matches:
428-
# Find the last complete tag
429-
last_match = complete_matches[-1]
430-
end_pos = last_match.end()
431-
432-
# Get text up to the end of the last complete tag
433-
processed_text = text_buffer[:end_pos]
434-
remaining_buffer = text_buffer[end_pos:]
435-
436-
# Apply reference splitting to the processed text
437-
processed_text = split_continuous_references(processed_text)
438-
439-
return processed_text, remaining_buffer
440-
441-
# Check for incomplete reference tags
442-
# Look for opening bracket with number and colon
443-
opening_pattern = r"\[\d+:"
444-
opening_matches = list(re.finditer(opening_pattern, text_buffer))
445-
446-
if opening_matches:
447-
# Find the last opening tag
448-
last_opening = opening_matches[-1]
449-
opening_start = last_opening.start()
450-
451-
# Check if we have a complete opening pattern
452-
if last_opening.end() <= len(text_buffer):
453-
# We have a complete opening pattern, keep everything in buffer
454-
return "", text_buffer
455-
else:
456-
# Incomplete opening pattern, return text before it
457-
processed_text = text_buffer[:opening_start]
458-
# Apply reference splitting to the processed text
459-
processed_text = split_continuous_references(processed_text)
460-
return processed_text, text_buffer[opening_start:]
461-
462-
# Check for partial opening pattern (starts with [ but not complete)
463-
if "[" in text_buffer:
464-
ref_start = text_buffer.find("[")
465-
processed_text = text_buffer[:ref_start]
466-
# Apply reference splitting to the processed text
467-
processed_text = split_continuous_references(processed_text)
468-
return processed_text, text_buffer[ref_start:]
469-
470-
# No reference tags found, apply reference splitting and return all text
471-
processed_text = split_continuous_references(text_buffer)
472-
return processed_text, ""
473-
474411
def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
475412
"""
476413
Extract reference information from the response and return clean text.
@@ -868,7 +805,7 @@ def chat_with_references(
868805
full_response += chunk
869806

870807
# Process buffer to ensure complete reference tags
871-
processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
808+
processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
872809

873810
if processed_chunk:
874811
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
@@ -877,7 +814,7 @@ def chat_with_references(
877814

878815
# Process any remaining buffer
879816
if buffer:
880-
processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
817+
processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
881818
if processed_chunk:
882819
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
883820
yield chunk_data

src/memos/mem_os/utils/format_utils.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,47 +1363,3 @@ def clean_json_response(response: str) -> str:
13631363
str: Clean JSON string without markdown formatting
13641364
"""
13651365
return response.replace("```json", "").replace("```", "").strip()
1366-
1367-
1368-
def split_continuous_references(text: str) -> str:
1369-
"""
1370-
Split continuous reference tags into individual reference tags.
1371-
1372-
Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
1373-
1374-
Only processes text if:
1375-
1. '[' appears exactly once
1376-
2. ']' appears exactly once
1377-
3. Contains commas between '[' and ']'
1378-
1379-
Args:
1380-
text (str): Text containing reference tags
1381-
1382-
Returns:
1383-
str: Text with split reference tags, or original text if conditions not met
1384-
"""
1385-
# Early return if text is empty
1386-
if not text:
1387-
return text
1388-
# Check if '[' appears exactly once
1389-
if text.count("[") != 1:
1390-
return text
1391-
# Check if ']' appears exactly once
1392-
if text.count("]") != 1:
1393-
return text
1394-
# Find positions of brackets
1395-
open_bracket_pos = text.find("[")
1396-
close_bracket_pos = text.find("]")
1397-
1398-
# Check if brackets are in correct order
1399-
if open_bracket_pos >= close_bracket_pos:
1400-
return text
1401-
# Extract content between brackets
1402-
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
1403-
# Check if there's a comma between brackets
1404-
if "," not in content_between_brackets:
1405-
return text
1406-
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
1407-
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
1408-
1409-
return text
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
def split_continuous_references(text: str) -> str:
2+
"""
3+
Split continuous reference tags into individual reference tags.
4+
5+
Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
6+
7+
Only processes text if:
8+
1. '[' appears exactly once
9+
2. ']' appears exactly once
10+
3. Contains commas between '[' and ']'
11+
12+
Args:
13+
text (str): Text containing reference tags
14+
15+
Returns:
16+
str: Text with split reference tags, or original text if conditions not met
17+
"""
18+
# Early return if text is empty
19+
if not text:
20+
return text
21+
# Check if '[' appears exactly once
22+
if text.count("[") != 1:
23+
return text
24+
# Check if ']' appears exactly once
25+
if text.count("]") != 1:
26+
return text
27+
# Find positions of brackets
28+
open_bracket_pos = text.find("[")
29+
close_bracket_pos = text.find("]")
30+
31+
# Check if brackets are in correct order
32+
if open_bracket_pos >= close_bracket_pos:
33+
return text
34+
# Extract content between brackets
35+
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
36+
# Check if there's a comma between brackets
37+
if "," not in content_between_brackets:
38+
return text
39+
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
40+
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
41+
42+
return text
43+
44+
45+
def process_streaming_references_complete(text_buffer: str) -> tuple[str, str]:
46+
"""
47+
Complete streaming reference processing to ensure reference tags are never split.
48+
49+
Args:
50+
text_buffer (str): The accumulated text buffer.
51+
52+
Returns:
53+
tuple[str, str]: (processed_text, remaining_buffer)
54+
"""
55+
import re
56+
57+
# Pattern to match complete reference tags: [refid:memoriesID]
58+
complete_pattern = r"\[\d+:[^\]]+\]"
59+
60+
# Find all complete reference tags
61+
complete_matches = list(re.finditer(complete_pattern, text_buffer))
62+
63+
if complete_matches:
64+
# Find the last complete tag
65+
last_match = complete_matches[-1]
66+
end_pos = last_match.end()
67+
68+
# Check if there's any incomplete reference after the last complete one
69+
remaining_text = text_buffer[end_pos:]
70+
71+
# Look for potential incomplete reference patterns after the last complete tag
72+
incomplete_pattern = r"\[\d*:?[^\]]*$"
73+
if re.search(incomplete_pattern, remaining_text):
74+
# There's a potential incomplete reference, find where it starts
75+
incomplete_match = re.search(incomplete_pattern, remaining_text)
76+
if incomplete_match:
77+
incomplete_start = end_pos + incomplete_match.start()
78+
processed_text = text_buffer[:incomplete_start]
79+
remaining_buffer = text_buffer[incomplete_start:]
80+
81+
# Apply reference splitting to the processed text
82+
processed_text = split_continuous_references(processed_text)
83+
return processed_text, remaining_buffer
84+
85+
# No incomplete reference after the last complete tag, process all
86+
processed_text = split_continuous_references(text_buffer)
87+
return processed_text, ""
88+
89+
# Check for incomplete reference tags - be more specific about what constitutes a potential reference
90+
# Look for opening bracket with number and colon that could be a reference tag
91+
opening_pattern = r"\[\d+:"
92+
opening_matches = list(re.finditer(opening_pattern, text_buffer))
93+
94+
if opening_matches:
95+
# Find the last opening tag
96+
last_opening = opening_matches[-1]
97+
opening_start = last_opening.start()
98+
99+
# Check if this might be a complete reference tag (has closing bracket after the pattern)
100+
remaining_text = text_buffer[last_opening.end() :]
101+
if "]" in remaining_text:
102+
# This looks like a complete reference tag, process it
103+
processed_text = split_continuous_references(text_buffer)
104+
return processed_text, ""
105+
else:
106+
# Incomplete reference tag, keep it in buffer
107+
processed_text = text_buffer[:opening_start]
108+
processed_text = split_continuous_references(processed_text)
109+
return processed_text, text_buffer[opening_start:]
110+
111+
# More sophisticated check for potential reference patterns
112+
# Only hold back text if we see a pattern that could be the start of a reference tag
113+
potential_ref_pattern = r"\[\d*:?$" # Matches [, [1, [12:, etc. at end of buffer
114+
if re.search(potential_ref_pattern, text_buffer):
115+
# Find the position of the potential reference start
116+
match = re.search(potential_ref_pattern, text_buffer)
117+
if match:
118+
ref_start = match.start()
119+
processed_text = text_buffer[:ref_start]
120+
processed_text = split_continuous_references(processed_text)
121+
return processed_text, text_buffer[ref_start:]
122+
123+
# Check for standalone [ only at the very end of the buffer
124+
# This prevents cutting off mathematical expressions like [ \Delta U = Q - W ]
125+
if text_buffer.endswith("["):
126+
# Only hold back the single [ character
127+
processed_text = text_buffer[:-1]
128+
processed_text = split_continuous_references(processed_text)
129+
return processed_text, "["
130+
131+
# No reference-like patterns found, process all text
132+
processed_text = split_continuous_references(text_buffer)
133+
return processed_text, ""

0 commit comments

Comments
 (0)