MemTensor
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/openapi.json‎
Lines changed: 1 addition & 1 deletion b/‎docs/openapi.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/core_memories/vllm_kv_cache_memory.py‎
Lines changed: 136 additions & 0 deletions b/‎examples/core_memories/vllm_kv_cache_memory.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎examples/data/config/mem_scheduler/general_scheduler_config.yaml‎
Lines changed: 1 addition & 2 deletions b/‎examples/data/config/mem_scheduler/general_scheduler_config.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/data/config/mem_scheduler/mem_cube_config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/data/config/mem_scheduler/mem_cube_config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/data/config/mem_scheduler/memos_config_w_scheduler.yaml‎
Lines changed: 1 addition & 2 deletions b/‎examples/data/config/mem_scheduler/memos_config_w_scheduler.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/data/config/mem_scheduler/memos_config_w_scheduler_and_openai.yaml‎
Lines changed: 49 additions & 0 deletions b/‎examples/data/config/mem_scheduler/memos_config_w_scheduler_and_openai.yaml‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎examples/mem_os/chat_w_generated_cube_explicit_memory.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/mem_os/chat_w_generated_cube_explicit_memory.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/mem_os/multi_user_memos_example.py‎
Lines changed: 125 additions & 0 deletions b/‎examples/mem_os/multi_user_memos_example.py‎
Lines changed: 125 additions & 0 deletions
@@ -186,6 +186,9 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
+# auth file
+*_auth.yaml
+
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 
@@ -884,7 +884,7 @@
             "type": "string",
             "title": "Session Id",
             "description": "Session ID for the MOS. This is used to distinguish between different dialogue",
-            "default": "3d88949f-cbe1-4244-a2e1-d346e8b76ca0"
+            "default": "a47d75a0-5ee8-473f-86c4-3f09073fd59f"
           },
           "chat_model": {
             "$ref": "#/components/schemas/LLMConfigFactory",
 
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""
+Example demonstrating how to use VLLMKVCacheMemory with vLLM backend.
+This example shows how to use the new vLLM-compatible KV cache memory.
+"""
+
+from memos.configs.memory import MemoryConfigFactory
+from memos.memories.factory import MemoryFactory
+
+
+def main():
+    """Main function demonstrating VLLMKVCacheMemory usage."""
+
+    print("=== VLLM KV Cache Memory Example ===\n")
+
+    # 1. Create config for VLLMKVCacheMemory (using vLLM backend)
+    config = MemoryConfigFactory(
+        backend="vllm_kv_cache",  # Use the new vLLM KV cache backend
+        config={
+            "extractor_llm": {
+                "backend": "vllm",
+                "config": {
+                    "model_name_or_path": "/mnt/afs/models/hf_models/Qwen2.5-7B",
+                    "api_base": "http://localhost:8088/v1",
+                    "temperature": 0.7,
+                    "max_tokens": 1024,
+                    "model_schema": "memos.configs.llm.VLLMLLMConfig",
+                },
+            },
+        },
+    )
+
+    # 2. Instantiate VLLMKVCacheMemory using the factory
+    print("Initializing VLLM KV Cache Memory...")
+    vllm_kv_mem = MemoryFactory.from_config(config)
+    print("✓ VLLM KV Cache Memory initialized successfully.\n")
+
+    # 3. Extract a VLLMKVCacheItem from a prompt
+    print("===== Extract VLLMKVCacheItem =====")
+    system_prompt = [
+        {"role": "system", "content": "You are a helpful AI assistant."},
+        {"role": "user", "content": "What is MemOS?"},
+        {"role": "assistant", "content": "MemOS is a memory operating system for LLMs."},
+    ]
+
+    try:
+        cache_item = vllm_kv_mem.extract(system_prompt)
+        print("✓ KV cache item extracted successfully")
+        print(f"  ID: {cache_item.id}")
+        print(f"  Memory (prompt): {cache_item.memory[:100]}...")
+        print(f"  Metadata: {cache_item.metadata}")
+        print()
+    except Exception as e:
+        print(f"✗ Failed to extract KV cache item: {e}")
+        return
+
+    # 4. Add the extracted VLLMKVCacheItem
+    print("===== Add VLLMKVCacheItem =====")
+    vllm_kv_mem.add([cache_item])
+    all_items = vllm_kv_mem.get_all()
+    print(f"✓ Added cache item. Total items: {len(all_items)}")
+    print()
+
+    # 5. Get by id
+    print("===== Get VLLMKVCacheItem by id =====")
+    retrieved = vllm_kv_mem.get(cache_item.id)
+    if retrieved:
+        print(f"✓ Retrieved cache item: {retrieved.id}")
+        print(f"  Memory (prompt): {retrieved.memory[:100]}...")
+    else:
+        print("✗ Failed to retrieve cache item")
+    print()
+
+    # 6. Get cache (returns prompt string for vLLM)
+    print("===== Get Cache (Prompt String) =====")
+    prompt_string = vllm_kv_mem.get_cache([cache_item.id])
+    if prompt_string:
+        print(f"✓ Retrieved prompt string: {prompt_string[:100]}...")
+        print("  This prompt can be used for vLLM generation with preloaded KV cache")
+    else:
+        print("✗ Failed to retrieve prompt string")
+    print()
+
+    # 7. Extract another cache item for demonstration
+    print("===== Extract Another VLLMKVCacheItem =====")
+    another_prompt = [
+        {"role": "system", "content": "You are a coding assistant."},
+        {"role": "user", "content": "Write a Python function to calculate fibonacci numbers."},
+    ]
+
+    try:
+        cache_item2 = vllm_kv_mem.extract(another_prompt)
+        vllm_kv_mem.add([cache_item2])
+        print(f"✓ Added second cache item. Total items: {len(vllm_kv_mem.get_all())}")
+        print()
+    except Exception as e:
+        print(f"✗ Failed to extract second KV cache item: {e}")
+        print()
+
+    # 8. Preload KV cache on vLLM server
+    print("===== Preload KV Cache on vLLM Server =====")
+    try:
+        vllm_kv_mem.preload_kv_cache([cache_item.id, cache_item2.id])
+        print("✓ KV cache preloaded on vLLM server successfully")
+        print("  The server now has the KV cache ready for fast generation")
+    except Exception as e:
+        print(f"✗ Failed to preload KV cache: {e}")
+    print()
+
+    # 9. Delete one item
+    print("===== Delete One VLLMKVCacheItem =====")
+    vllm_kv_mem.delete([cache_item.id])
+    remaining_items = vllm_kv_mem.get_all()
+    print(f"✓ Deleted cache item. Remaining items: {len(remaining_items)}")
+    print()
+
+    # 10. Dump and load
+    print("===== Dump and Load VLLMKVCacheMemory =====")
+    try:
+        vllm_kv_mem.dump("tmp/vllm_kv_mem")
+        print("✓ Memory dumped to 'tmp/vllm_kv_mem'")
+
+        # Clear memory and reload
+        vllm_kv_mem.delete_all()
+        vllm_kv_mem.load("tmp/vllm_kv_mem")
+        reloaded_items = vllm_kv_mem.get_all()
+        print(f"✓ Memory loaded from 'tmp/vllm_kv_mem': {len(reloaded_items)} items")
+    except Exception as e:
+        print(f"✗ Failed to dump/load memory: {e}")
+    print()
+
+    print("=== Example completed successfully ===")
+
+
+if __name__ == "__main__":
+    main()
@@ -2,9 +2,8 @@ backend: general_scheduler
 config:
   top_k: 10
   top_n: 5
-  act_mem_update_interval: 300
+  act_mem_update_interval: 30
   context_window_size: 5
-  activation_mem_size: 5
   thread_pool_max_workers: 5
   consume_interval_seconds: 3
   enable_parallel_dispatch: true
@@ -20,7 +20,7 @@ text_mem:
     graph_db:
       backend: "neo4j"
       config:
-        uri: "bolt://123.57.48.226:7687"
+        uri: "bolt://localhost:7687"
         user: "neo4j"
         password: "12345678"
         db_name: "user11alice"
 
@@ -34,9 +34,8 @@ mem_scheduler:
   config:
     top_k: 10
     top_n: 5
-    act_mem_update_interval: 300
+    act_mem_update_interval: 30
     context_window_size: 5
-    activation_mem_size: 1000
     thread_pool_max_workers: 10
     consume_interval_seconds: 3
     enable_parallel_dispatch: true
 
@@ -0,0 +1,49 @@
+user_id: "root"
+chat_model:
+  backend: "huggingface"
+  config:
+    model_name_or_path: "Qwen/Qwen3-1.7B"
+    temperature: 0.1
+    remove_think_prefix: true
+    max_tokens: 4096
+mem_reader:
+  backend: "simple_struct"
+  config:
+    llm:
+      backend: "openai"
+      config:
+        model_name_or_path: "gpt-4o-mini"
+        temperature: 0.8
+        max_tokens: 4096
+        top_p: 0.9
+        top_k: 50
+        remove_think_prefix: true
+        api_key: "sk-xxxxxx"
+        api_base: "https://api.openai.com/v1"
+    embedder:
+      backend: "ollama"
+      config:
+        model_name_or_path: "nomic-embed-text:latest"
+    chunker:
+      backend: "sentence"
+      config:
+        tokenizer_or_token_counter: "gpt2"
+        chunk_size: 512
+        chunk_overlap: 128
+        min_sentences_per_chunk: 1
+mem_scheduler:
+  backend: "general_scheduler"
+  config:
+    top_k: 2
+    top_n: 5
+    act_mem_update_interval: 30
+    context_window_size: 5
+    thread_pool_max_workers: 10
+    consume_interval_seconds: 3
+    enable_parallel_dispatch: true
+max_turns_window: 20
+top_k: 5
+enable_textual_memory: true
+enable_activation_memory: true
+enable_parametric_memory: false
+enable_mem_scheduler: true
@@ -84,7 +84,7 @@
                 "graph_db": {
                     "backend": "neo4j",
                     "config": {
-                        "uri": "bolt://123.57.48.226:7687",
+                        "uri": "bolt://localhost:7687",
                         "user": "neo4j",
                         "password": "12345678",
                         "db_name": "user03alice11",
 
@@ -0,0 +1,125 @@
+"""
+Example demonstrating how to use MOSProduct for multi-user scenarios.
+"""
+
+from memos.configs.mem_cube import GeneralMemCubeConfig
+from memos.configs.mem_os import MOSConfig
+from memos.mem_cube.general import GeneralMemCube
+from memos.mem_os.product import MOSProduct
+
+
+def get_config(user_name):
+    openapi_config = {
+        "model_name_or_path": "gpt-4o-mini",
+        "temperature": 0.8,
+        "max_tokens": 1024,
+        "top_p": 0.9,
+        "top_k": 50,
+        "remove_think_prefix": True,
+        "api_key": "your-api-key-here",
+        "api_base": "https://api.openai.com/v1",
+    }
+    # Create a default configuration
+    default_config = MOSConfig(
+        user_id="root",
+        chat_model={"backend": "openai", "config": openapi_config},
+        mem_reader={
+            "backend": "naive",
+            "config": {
+                "llm": {
+                    "backend": "openai",
+                    "config": openapi_config,
+                },
+                "embedder": {
+                    "backend": "ollama",
+                    "config": {
+                        "model_name_or_path": "nomic-embed-text:latest",
+                    },
+                },
+            },
+        },
+        enable_textual_memory=True,
+        enable_activation_memory=False,
+        top_k=5,
+        max_turns_window=20,
+    )
+    default_cube_config = GeneralMemCubeConfig.model_validate(
+        {
+            "user_id": user_name,
+            "cube_id": f"{user_name}_default_cube",
+            "text_mem": {
+                "backend": "tree_text",
+                "config": {
+                    "extractor_llm": {"backend": "openai", "config": openapi_config},
+                    "dispatcher_llm": {"backend": "openai", "config": openapi_config},
+                    "graph_db": {
+                        "backend": "neo4j",
+                        "config": {
+                            "uri": "bolt://localhost:7687",
+                            "user": "neo4j",
+                            "password": "12345678",
+                            "db_name": user_name,
+                            "auto_create": True,
+                        },
+                    },
+                    "embedder": {
+                        "backend": "ollama",
+                        "config": {
+                            "model_name_or_path": "nomic-embed-text:latest",
+                        },
+                    },
+                },
+            },
+            "act_mem": {},
+            "para_mem": {},
+        }
+    )
+    default_mem_cube = GeneralMemCube(default_cube_config)
+    return default_config, default_mem_cube
+
+
+def main():
+    default_config, default_mem_cube = get_config(user_name="alice")
+    # Initialize MOSProduct with default config
+    mos_product = MOSProduct(default_config=default_config)
+
+    # Register first user with default config
+    result1 = mos_product.user_register(
+        user_id="alice",
+        user_name="alice",
+        interests="I'm interested in machine learning and AI research.",
+        default_mem_cube=default_mem_cube,
+    )
+    print(f"User registration result: {result1}")
+
+    # Chat with Alice
+    print("\n=== Chatting with Alice ===")
+    for response_chunk in mos_product.chat(query="What are my interests?", user_id="alice"):
+        print(response_chunk, end="")
+
+    # Add memory for Alice
+    mos_product.add(
+        user_id="alice",
+        memory_content="I attended a machine learning conference last week.",
+        mem_cube_id=result1["default_cube_id"],
+    )
+
+    # Search memories for Alice
+    search_result = mos_product.search(query="conference", user_id="alice")
+    print(f"\nSearch result for Alice: {search_result}")
+
+    # Search memories for Alice
+    search_result = mos_product.get_all(query="conference", user_id="alice", memory_type="text_mem")
+    print(f"\nSearch result for Alice: {search_result}")
+
+    # List all users
+    users = mos_product.list_users()
+    print(f"\nAll registered users: {users}")
+
+    # Get user info
+    alice_info = mos_product.get_user_info("alice")
+    print(f"\nAlice's info: {alice_info}")
+
+
+if __name__ == "__main__":
+    main()