Skip to content

Commit f41942d

Browse files
authored
Merge branch 'dev' into feat/deep-search
2 parents b022b04 + 111b4d4 commit f41942d

32 files changed

+653
-320
lines changed

docker/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,3 @@ xlrd==2.0.2
160160
xlsxwriter==3.2.5
161161
prometheus-client==0.23.1
162162
pymilvus==2.5.12
163-
langchain-text-splitters==1.0.0

examples/mem_scheduler/memos_w_scheduler.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
from memos.mem_cube.general import GeneralMemCube
1414
from memos.mem_os.main import MOS
1515
from memos.mem_scheduler.general_scheduler import GeneralScheduler
16-
from memos.mem_scheduler.schemas.general_schemas import (
17-
ADD_LABEL,
18-
ANSWER_LABEL,
19-
MEM_ARCHIVE_LABEL,
20-
MEM_ORGANIZE_LABEL,
21-
MEM_UPDATE_LABEL,
22-
QUERY_LABEL,
23-
)
2416
from memos.mem_scheduler.schemas.message_schemas import ScheduleLogForWebItem
17+
from memos.mem_scheduler.schemas.task_schemas import (
18+
ADD_TASK_LABEL,
19+
ANSWER_TASK_LABEL,
20+
MEM_ARCHIVE_TASK_LABEL,
21+
MEM_ORGANIZE_TASK_LABEL,
22+
MEM_UPDATE_TASK_LABEL,
23+
QUERY_TASK_LABEL,
24+
)
2525
from memos.mem_scheduler.utils.filter_utils import transform_name_to_key
2626

2727

@@ -118,24 +118,24 @@ def _first_content() -> str:
118118
return memcube_content[0].get("content", "") or content
119119
return content
120120

121-
if label in ("addMessage", QUERY_LABEL, ANSWER_LABEL):
121+
if label in ("addMessage", QUERY_TASK_LABEL, ANSWER_TASK_LABEL):
122122
target_cube = cube_display.replace("MemCube", "")
123123
title = _format_title(item.timestamp, f"addMessages to {target_cube} MemCube")
124124
return title, _truncate_with_rules(_first_content())
125125

126-
if label in ("addMemory", ADD_LABEL):
126+
if label in ("addMemory", ADD_TASK_LABEL):
127127
title = _format_title(item.timestamp, f"{cube_display} added {memory_len} memories")
128128
return title, _truncate_with_rules(_first_content())
129129

130-
if label in ("updateMemory", MEM_UPDATE_LABEL):
130+
if label in ("updateMemory", MEM_UPDATE_TASK_LABEL):
131131
title = _format_title(item.timestamp, f"{cube_display} updated {memory_len} memories")
132132
return title, _truncate_with_rules(_first_content())
133133

134-
if label in ("archiveMemory", MEM_ARCHIVE_LABEL):
134+
if label in ("archiveMemory", MEM_ARCHIVE_TASK_LABEL):
135135
title = _format_title(item.timestamp, f"{cube_display} archived {memory_len} memories")
136136
return title, _truncate_with_rules(_first_content())
137137

138-
if label in ("mergeMemory", MEM_ORGANIZE_LABEL):
138+
if label in ("mergeMemory", MEM_ORGANIZE_TASK_LABEL):
139139
title = _format_title(item.timestamp, f"{cube_display} merged {memory_len} memories")
140140
merged = [c for c in memcube_content if c.get("type") == "merged"]
141141
post = [c for c in memcube_content if c.get("type") == "postMerge"]

examples/mem_scheduler/redis_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from memos.configs.mem_scheduler import SchedulerConfigFactory
1010
from memos.mem_cube.general import GeneralMemCube
1111
from memos.mem_scheduler.scheduler_factory import SchedulerFactory
12-
from memos.mem_scheduler.schemas.general_schemas import QUERY_LABEL
1312
from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
13+
from memos.mem_scheduler.schemas.task_schemas import QUERY_TASK_LABEL
1414

1515

1616
if TYPE_CHECKING:
@@ -55,7 +55,7 @@ def service_run():
5555
message_item = ScheduleMessageItem(
5656
user_id=user_id,
5757
mem_cube_id="mem_cube_2",
58-
label=QUERY_LABEL,
58+
label=QUERY_TASK_LABEL,
5959
mem_cube=mem_cube,
6060
content=query,
6161
timestamp=datetime.now(),

examples/mem_scheduler/try_schedule_modules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from memos.mem_cube.general import GeneralMemCube
1515
from memos.mem_scheduler.analyzer.mos_for_test_scheduler import MOSForTestScheduler
1616
from memos.mem_scheduler.general_scheduler import GeneralScheduler
17-
from memos.mem_scheduler.schemas.general_schemas import (
17+
from memos.mem_scheduler.schemas.task_schemas import (
1818
NOT_APPLICABLE_TYPE,
1919
)
2020

src/memos/api/handlers/chat_handler.py

Lines changed: 90 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@
3030
prepare_reference_data,
3131
process_streaming_references_complete,
3232
)
33-
from memos.mem_scheduler.schemas.general_schemas import (
34-
ANSWER_LABEL,
35-
QUERY_LABEL,
36-
)
3733
from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
34+
from memos.mem_scheduler.schemas.task_schemas import (
35+
ANSWER_TASK_LABEL,
36+
QUERY_TASK_LABEL,
37+
)
3838
from memos.templates.mos_prompts import (
3939
FURTHER_SUGGESTION_PROMPT,
4040
get_memos_prompt,
@@ -244,7 +244,7 @@ def generate_chat_response() -> Generator[str, None, None]:
244244
user_id=chat_req.user_id,
245245
mem_cube_id=scheduler_cube_id,
246246
query=chat_req.query,
247-
label=QUERY_LABEL,
247+
label=QUERY_TASK_LABEL,
248248
)
249249
# Extract memories from search results
250250
memories_list = []
@@ -388,22 +388,6 @@ def generate_chat_response() -> Generator[str, None, None]:
388388
[chat_req.mem_cube_id] if chat_req.mem_cube_id else [chat_req.user_id]
389389
)
390390

391-
search_req = APISearchRequest(
392-
query=chat_req.query,
393-
user_id=chat_req.user_id,
394-
readable_cube_ids=readable_cube_ids,
395-
mode=chat_req.mode,
396-
internet_search=chat_req.internet_search,
397-
top_k=chat_req.top_k,
398-
chat_history=chat_req.history,
399-
session_id=chat_req.session_id,
400-
include_preference=chat_req.include_preference,
401-
pref_top_k=chat_req.pref_top_k,
402-
filter=chat_req.filter,
403-
playground_search_goal_parser=True,
404-
)
405-
406-
search_response = self.search_handler.handle_search_memories(search_req)
407391
# for playground, add the query to memory without response
408392
self._start_add_to_memory(
409393
user_id=chat_req.user_id,
@@ -414,7 +398,6 @@ def generate_chat_response() -> Generator[str, None, None]:
414398
async_mode="sync",
415399
)
416400

417-
yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
418401
# Use first readable cube ID for scheduler (backward compatibility)
419402
scheduler_cube_id = (
420403
readable_cube_ids[0] if readable_cube_ids else chat_req.user_id
@@ -423,24 +406,42 @@ def generate_chat_response() -> Generator[str, None, None]:
423406
user_id=chat_req.user_id,
424407
mem_cube_id=scheduler_cube_id,
425408
query=chat_req.query,
426-
label=QUERY_LABEL,
409+
label=QUERY_TASK_LABEL,
427410
)
428-
# Extract memories from search results
411+
412+
# ====== first search without parse goal ======
413+
search_req = APISearchRequest(
414+
query=chat_req.query,
415+
user_id=chat_req.user_id,
416+
readable_cube_ids=readable_cube_ids,
417+
mode=chat_req.mode,
418+
internet_search=False,
419+
top_k=chat_req.top_k,
420+
chat_history=chat_req.history,
421+
session_id=chat_req.session_id,
422+
include_preference=chat_req.include_preference,
423+
pref_top_k=chat_req.pref_top_k,
424+
filter=chat_req.filter,
425+
)
426+
search_response = self.search_handler.handle_search_memories(search_req)
427+
428+
yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
429+
430+
# Extract memories from search results (first search)
429431
memories_list = []
430432
if search_response.data and search_response.data.get("text_mem"):
431433
text_mem_results = search_response.data["text_mem"]
432434
if text_mem_results and text_mem_results[0].get("memories"):
433435
memories_list = text_mem_results[0]["memories"]
434436

435437
# Filter memories by threshold
436-
filtered_memories = self._filter_memories_by_threshold(memories_list)
438+
first_filtered_memories = self._filter_memories_by_threshold(memories_list)
439+
440+
# Prepare reference data (first search)
441+
reference = prepare_reference_data(first_filtered_memories)
442+
# get preference string
443+
pref_string = search_response.data.get("pref_string", "")
437444

438-
# Prepare reference data
439-
reference = prepare_reference_data(filtered_memories)
440-
# get internet reference
441-
internet_reference = self._get_internet_reference(
442-
search_response.data.get("text_mem")[0]["memories"]
443-
)
444445
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
445446

446447
# Prepare preference markdown string
@@ -450,9 +451,52 @@ def generate_chat_response() -> Generator[str, None, None]:
450451
pref_md_string = self._build_pref_md_string_for_playground(pref_memories)
451452
yield f"data: {json.dumps({'type': 'pref_md_string', 'data': pref_md_string})}\n\n"
452453

454+
# internet status
455+
yield f"data: {json.dumps({'type': 'status', 'data': 'start_internet_search'})}\n\n"
456+
457+
# ====== second search with parse goal ======
458+
search_req = APISearchRequest(
459+
query=chat_req.query,
460+
user_id=chat_req.user_id,
461+
readable_cube_ids=readable_cube_ids,
462+
mode=chat_req.mode,
463+
internet_search=chat_req.internet_search,
464+
top_k=chat_req.top_k,
465+
chat_history=chat_req.history,
466+
session_id=chat_req.session_id,
467+
include_preference=False,
468+
filter=chat_req.filter,
469+
playground_search_goal_parser=True,
470+
)
471+
search_response = self.search_handler.handle_search_memories(search_req)
472+
473+
# Extract memories from search results (second search)
474+
memories_list = []
475+
if search_response.data and search_response.data.get("text_mem"):
476+
text_mem_results = search_response.data["text_mem"]
477+
if text_mem_results and text_mem_results[0].get("memories"):
478+
memories_list = text_mem_results[0]["memories"]
479+
480+
# Filter memories by threshold
481+
second_filtered_memories = self._filter_memories_by_threshold(memories_list)
482+
483+
# dedup and supplement memories
484+
filtered_memories = self._dedup_and_supplement_memories(
485+
first_filtered_memories, second_filtered_memories
486+
)
487+
488+
# Prepare remain reference data (second search)
489+
reference = prepare_reference_data(filtered_memories)
490+
# get internet reference
491+
internet_reference = self._get_internet_reference(
492+
search_response.data.get("text_mem")[0]["memories"]
493+
)
494+
495+
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
496+
453497
# Step 2: Build system prompt with memories
454498
system_prompt = self._build_enhance_system_prompt(
455-
filtered_memories, search_response.data.get("pref_string", "")
499+
filtered_memories, pref_string
456500
)
457501

458502
# Prepare messages
@@ -588,6 +632,19 @@ def generate_chat_response() -> Generator[str, None, None]:
588632
self.logger.error(f"Failed to start chat stream: {traceback.format_exc()}")
589633
raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err
590634

635+
def _dedup_and_supplement_memories(
636+
self, first_filtered_memories: list, second_filtered_memories: list
637+
) -> list:
638+
"""Remove memory from second_filtered_memories that already exists in first_filtered_memories, return remaining memories"""
639+
# Create a set of IDs from first_filtered_memories for efficient lookup
640+
first_memory_ids = {memory["id"] for memory in first_filtered_memories}
641+
642+
remaining_memories = []
643+
for memory in second_filtered_memories:
644+
if memory["id"] not in first_memory_ids:
645+
remaining_memories.append(memory)
646+
return remaining_memories
647+
591648
def _get_internet_reference(
592649
self, search_response: list[dict[str, any]]
593650
) -> list[dict[str, any]]:
@@ -1034,7 +1091,7 @@ async def _post_chat_processing(
10341091

10351092
# Send answer to scheduler
10361093
self._send_message_to_scheduler(
1037-
user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
1094+
user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_TASK_LABEL
10381095
)
10391096

10401097
self.logger.info(f"Post-chat processing completed for user {user_id}")

src/memos/api/handlers/memory_handler.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,8 @@ def handle_delete_memories(delete_mem_req: DeleteMemoryRequest, naive_mem_cube:
209209
if naive_mem_cube.pref_mem is not None:
210210
naive_mem_cube.pref_mem.delete(delete_mem_req.memory_ids)
211211
elif delete_mem_req.file_ids is not None:
212-
# TODO: Implement deletion by file_ids
213-
# Need to find memory_ids associated with file_ids and delete them
214-
logger.warning("Deletion by file_ids not implemented yet")
215-
return DeleteMemoryResponse(
216-
message="Deletion by file_ids not implemented yet",
217-
data={"status": "failure"},
212+
naive_mem_cube.text_mem.delete_by_filter(
213+
writable_cube_ids=delete_mem_req.writable_cube_ids, file_ids=delete_mem_req.file_ids
218214
)
219215
elif delete_mem_req.filter is not None:
220216
# TODO: Implement deletion by filter

src/memos/configs/mem_reader.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime
22
from typing import Any, ClassVar
33

4-
from pydantic import Field, field_validator, model_validator
4+
from pydantic import ConfigDict, Field, field_validator, model_validator
55

66
from memos.configs.base import BaseConfig
77
from memos.configs.chunker import ChunkerConfigFactory
@@ -44,6 +44,8 @@ def parse_datetime(cls, value):
4444
class SimpleStructMemReaderConfig(BaseMemReaderConfig):
4545
"""SimpleStruct MemReader configuration class."""
4646

47+
model_config = ConfigDict(extra="allow", strict=True)
48+
4749

4850
class MultiModalStructMemReaderConfig(BaseMemReaderConfig):
4951
"""MultiModalStruct MemReader configuration class."""
@@ -58,6 +60,8 @@ class MultiModalStructMemReaderConfig(BaseMemReaderConfig):
5860
class StrategyStructMemReaderConfig(BaseMemReaderConfig):
5961
"""StrategyStruct MemReader configuration class."""
6062

63+
model_config = ConfigDict(extra="allow", strict=True)
64+
6165

6266
class MemReaderConfigFactory(BaseConfig):
6367
"""Factory class for creating MemReader configurations."""

src/memos/graph_dbs/neo4j.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,7 @@ def delete_node_by_prams(
15881588
file_id_and_conditions.append(f"${param_name} IN n.file_ids")
15891589
if file_id_and_conditions:
15901590
# Use AND to require all file_ids to be present
1591-
where_clauses.append(f"({' AND '.join(file_id_and_conditions)})")
1591+
where_clauses.append(f"({' OR '.join(file_id_and_conditions)})")
15921592

15931593
# Query nodes by filter if provided
15941594
filter_ids = []

0 commit comments

Comments
 (0)