Skip to content

Commit 87d9290

Browse files
committed
:Merge branch 'test' into dev_mcp
2 parents 5ce9690 + bb17aef commit 87d9290

File tree

16 files changed

+1525
-72
lines changed

16 files changed

+1525
-72
lines changed

src/memos/api/routers/server_router.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,57 @@ def search_memories(search_req: APISearchRequest):
243243
)
244244

245245

246+
@router.post("/search_ws", summary="Search memories with scheduler", response_model=SearchResponse)
247+
def search_memories_ws(search_req: APISearchRequest):
248+
"""Search memories for a specific user."""
249+
# Create UserContext object - how to assign values
250+
user_context = UserContext(
251+
user_id=search_req.user_id,
252+
mem_cube_id=search_req.mem_cube_id,
253+
session_id=search_req.session_id or "default_session",
254+
)
255+
logger.info(f"Search user_id is: {user_context.mem_cube_id}")
256+
memories_result: MOSSearchResult = {
257+
"text_mem": [],
258+
"act_mem": [],
259+
"para_mem": [],
260+
}
261+
target_session_id = search_req.session_id
262+
if not target_session_id:
263+
target_session_id = "default_session"
264+
search_filter = {"session_id": search_req.session_id} if search_req.session_id else None
265+
266+
# Create MemCube and perform search
267+
naive_mem_cube = _create_naive_mem_cube()
268+
search_results = naive_mem_cube.text_mem.search(
269+
query=search_req.query,
270+
user_name=user_context.mem_cube_id,
271+
top_k=search_req.top_k,
272+
mode=search_req.mode,
273+
manual_close_internet=not search_req.internet_search,
274+
moscube=search_req.moscube,
275+
search_filter=search_filter,
276+
info={
277+
"user_id": search_req.user_id,
278+
"session_id": target_session_id,
279+
"chat_history": search_req.chat_history,
280+
},
281+
)
282+
formatted_memories = [_format_memory_item(data) for data in search_results]
283+
284+
memories_result["text_mem"].append(
285+
{
286+
"cube_id": search_req.mem_cube_id,
287+
"memories": formatted_memories,
288+
}
289+
)
290+
291+
return SearchResponse(
292+
message="Search completed successfully",
293+
data=memories_result,
294+
)
295+
296+
246297
@router.post("/add", summary="Add memories", response_model=MemoryResponse)
247298
def add_memories(add_req: APIADDRequest):
248299
"""Add memories for a specific user."""

src/memos/configs/mem_scheduler.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,11 @@ class BaseSchedulerConfig(BaseConfig):
2828
thread_pool_max_workers: int = Field(
2929
default=DEFAULT_THREAD_POOL_MAX_WORKERS,
3030
gt=1,
31-
lt=20,
3231
description=f"Maximum worker threads in pool (default: {DEFAULT_THREAD_POOL_MAX_WORKERS})",
3332
)
3433
consume_interval_seconds: float = Field(
3534
default=DEFAULT_CONSUME_INTERVAL_SECONDS,
3635
gt=0,
37-
le=60,
3836
description=f"Interval for consuming messages from queue in seconds (default: {DEFAULT_CONSUME_INTERVAL_SECONDS})",
3937
)
4038
auth_config_path: str | None = Field(

src/memos/llms/hf.py

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,52 @@ def build_kv_cache(self, messages) -> DynamicCache:
379379
raise ValueError(
380380
"Prompt after chat template is empty, cannot build KV cache. Check your messages input."
381381
)
382-
kv = DynamicCache()
382+
# Create cache and perform forward pass without pre-existing cache
383383
with torch.no_grad():
384-
self.model(**inputs, use_cache=True, past_key_values=kv)
385-
for i, (k, v) in enumerate(zip(kv.key_cache, kv.value_cache, strict=False)):
386-
kv.key_cache[i] = k[:, :, :seq_len, :]
387-
kv.value_cache[i] = v[:, :, :seq_len, :]
388-
return kv
384+
outputs = self.model(**inputs, use_cache=True)
385+
386+
# Get the cache from model outputs
387+
if hasattr(outputs, "past_key_values") and outputs.past_key_values is not None:
388+
kv = outputs.past_key_values
389+
390+
# Convert from legacy tuple format to DynamicCache if needed
391+
if isinstance(kv, tuple):
392+
kv = DynamicCache.from_legacy_cache(kv)
393+
394+
# Handle compatibility between old and new transformers versions
395+
# In newer versions, DynamicCache uses 'layers' attribute
396+
# In older versions, it uses 'key_cache' and 'value_cache' attributes
397+
if hasattr(kv, "layers"):
398+
# New version: trim cache using layers attribute
399+
for layer in kv.layers:
400+
if hasattr(layer, "key_cache") and hasattr(layer, "value_cache"):
401+
# Trim each layer's cache to the sequence length
402+
if layer.key_cache is not None:
403+
layer.key_cache = layer.key_cache[:, :, :seq_len, :]
404+
if layer.value_cache is not None:
405+
layer.value_cache = layer.value_cache[:, :, :seq_len, :]
406+
elif hasattr(layer, "keys") and hasattr(layer, "values"):
407+
# Alternative attribute names in some versions
408+
if layer.keys is not None:
409+
layer.keys = layer.keys[:, :, :seq_len, :]
410+
if layer.values is not None:
411+
layer.values = layer.values[:, :, :seq_len, :]
412+
elif hasattr(kv, "key_cache") and hasattr(kv, "value_cache"):
413+
# Old version: trim cache using key_cache and value_cache attributes
414+
for i in range(len(kv.key_cache)):
415+
if kv.key_cache[i] is not None:
416+
kv.key_cache[i] = kv.key_cache[i][:, :, :seq_len, :]
417+
if kv.value_cache[i] is not None:
418+
kv.value_cache[i] = kv.value_cache[i][:, :, :seq_len, :]
419+
else:
420+
# Fallback: log warning but continue without trimming
421+
logger.warning(
422+
f"DynamicCache object of type {type(kv)} has unexpected structure. "
423+
f"Cache trimming skipped. Available attributes: {dir(kv)}"
424+
)
425+
426+
return kv
427+
else:
428+
raise RuntimeError(
429+
"Failed to build KV cache: no cache data available from model outputs"
430+
)

src/memos/mem_os/core.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -308,18 +308,20 @@ def chat(self, query: str, user_id: str | None = None, base_prompt: str | None =
308308
past_key_values = None
309309

310310
if self.config.enable_activation_memory:
311-
assert self.config.chat_model.backend == "huggingface", (
312-
"Activation memory only used for huggingface backend."
313-
)
314-
# TODO this only one cubes
315-
for mem_cube_id, mem_cube in self.mem_cubes.items():
316-
if mem_cube_id not in user_cube_ids:
317-
continue
318-
if mem_cube.act_mem:
319-
kv_cache = next(iter(mem_cube.act_mem.get_all()), None)
320-
past_key_values = (
321-
kv_cache.memory if (kv_cache and hasattr(kv_cache, "memory")) else None
322-
)
311+
if self.config.chat_model.backend != "huggingface":
312+
logger.error(
313+
"Activation memory only used for huggingface backend. Skipping activation memory."
314+
)
315+
else:
316+
# TODO this only one cubes
317+
for mem_cube_id, mem_cube in self.mem_cubes.items():
318+
if mem_cube_id not in user_cube_ids:
319+
continue
320+
if mem_cube.act_mem:
321+
kv_cache = next(iter(mem_cube.act_mem.get_all()), None)
322+
past_key_values = (
323+
kv_cache.memory if (kv_cache and hasattr(kv_cache, "memory")) else None
324+
)
323325
break
324326
# Generate response
325327
response = self.chat_llm.generate(current_messages, past_key_values=past_key_values)

src/memos/mem_os/main.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -312,23 +312,25 @@ def _generate_enhanced_response_with_context(
312312
# Handle activation memory if enabled (same as core method)
313313
past_key_values = None
314314
if self.config.enable_activation_memory:
315-
assert self.config.chat_model.backend == "huggingface", (
316-
"Activation memory only used for huggingface backend."
317-
)
318-
# Get accessible cubes for the user
319-
target_user_id = user_id if user_id is not None else self.user_id
320-
accessible_cubes = self.user_manager.get_user_cubes(target_user_id)
321-
user_cube_ids = [cube.cube_id for cube in accessible_cubes]
322-
323-
for mem_cube_id, mem_cube in self.mem_cubes.items():
324-
if mem_cube_id not in user_cube_ids:
325-
continue
326-
if mem_cube.act_mem:
327-
kv_cache = next(iter(mem_cube.act_mem.get_all()), None)
328-
past_key_values = (
329-
kv_cache.memory if (kv_cache and hasattr(kv_cache, "memory")) else None
330-
)
331-
break
315+
if self.config.chat_model.backend != "huggingface":
316+
logger.error(
317+
"Activation memory only used for huggingface backend. Skipping activation memory."
318+
)
319+
else:
320+
# Get accessible cubes for the user
321+
target_user_id = user_id if user_id is not None else self.user_id
322+
accessible_cubes = self.user_manager.get_user_cubes(target_user_id)
323+
user_cube_ids = [cube.cube_id for cube in accessible_cubes]
324+
325+
for mem_cube_id, mem_cube in self.mem_cubes.items():
326+
if mem_cube_id not in user_cube_ids:
327+
continue
328+
if mem_cube.act_mem:
329+
kv_cache = next(iter(mem_cube.act_mem.get_all()), None)
330+
past_key_values = (
331+
kv_cache.memory if (kv_cache and hasattr(kv_cache, "memory")) else None
332+
)
333+
break
332334

333335
try:
334336
# Generate the enhanced response using the chat LLM with same parameters as core

0 commit comments

Comments
 (0)