Skip to content

Commit b7636eb

Browse files
fridayLtangg555kakackCaralHsiGaojianli
authored
Feat: add demo api and scheduler vllm etc (#83)
* feat: add product API and related functionality (cleaned sensitive data) * feat: add singleton for memos and llm * fix:fix persistent_user_manager * feat:update prompt and rm readme * fix:ci code * feat: update demo and add user manager (#31) * feat: add product API and related functionality (cleaned sensitive data) * feat: add singleton for memos and llm * fix:fix persistent_user_manager * feat:update prompt and rm readme * fix:ci code * Feature & Fix bugs: mem scheduler with rabbitmq and web log submission * fix bug of message consuming * add a range of configs * support rabbitmq * add new docs for the beginner * add more examples * fix:UserRole error * update: update product api * fix: suggestion mem * update:fix add mem * feat: update chat * fix: add uuid and product think * update: search data * fix:update * fix:chat mem * feat:vllm llm support version0 * feat: update zh en and mem * feat: update cube ids * feat:vllm llm support version0.5 * fix: memchat multi turn * feat: add multi-language feature in mem-reader * feat: add multi-language feature in mem-reader * feat: add multi-language feature in tree reorganizer * feat: reinforce language * fix:fix search and add product tmp data * fix bugs & refactor the code: mem scheduler support web log feature * refactor & fix bugs: mem scheduler related codes including modules like dispatcher and retriever, the dependencies and the unittest. * fix:remove dup * feat: add logs * feat:vllm llm support streaming generate and add benchmark script * add: chat time for add * fix:merge error * fix:merge error * feat & fix bugs: factor mem scheduler. test_retriever.py is waiting for test * feat:add vllm chat model * fix bugs: filter logging of retriever module in mem scheduler * fix: test_api * feat:update feat * fix: manager * feat: add vllm cahche * fix bugs: fix scheduler logging bug of working memory replacement * refactor: change default update interval * fix bugs: fix bugs in update activation memory in memory scheduler * add support for azure backend * feat: add deafult config overide for store config * feat: add MOS_ENABLE_REORGANIZE config * fix bugs: fix bugs of filter and updating activation memories * fix bugs: change name * feat: add act mme * fix:poetry lock * delete: del file --------- Co-authored-by: chentang <[email protected]> Co-authored-by: Kai <[email protected]> Co-authored-by: 席阳阳 <[email protected]> Co-authored-by: zhangyibo.114514 <[email protected]>
1 parent 86c999a commit b7636eb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+7866
-1224
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ dmypy.json
186186
# Cython debug symbols
187187
cython_debug/
188188

189+
# auth file
190+
*_auth.yaml
191+
189192
# PyCharm
190193
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
191194
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore

docs/openapi.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,7 @@
884884
"type": "string",
885885
"title": "Session Id",
886886
"description": "Session ID for the MOS. This is used to distinguish between different dialogue",
887-
"default": "3d88949f-cbe1-4244-a2e1-d346e8b76ca0"
887+
"default": "a47d75a0-5ee8-473f-86c4-3f09073fd59f"
888888
},
889889
"chat_model": {
890890
"$ref": "#/components/schemas/LLMConfigFactory",
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Example demonstrating how to use VLLMKVCacheMemory with vLLM backend.
4+
This example shows how to use the new vLLM-compatible KV cache memory.
5+
"""
6+
7+
from memos.configs.memory import MemoryConfigFactory
8+
from memos.memories.factory import MemoryFactory
9+
10+
11+
def main():
12+
"""Main function demonstrating VLLMKVCacheMemory usage."""
13+
14+
print("=== VLLM KV Cache Memory Example ===\n")
15+
16+
# 1. Create config for VLLMKVCacheMemory (using vLLM backend)
17+
config = MemoryConfigFactory(
18+
backend="vllm_kv_cache", # Use the new vLLM KV cache backend
19+
config={
20+
"extractor_llm": {
21+
"backend": "vllm",
22+
"config": {
23+
"model_name_or_path": "/mnt/afs/models/hf_models/Qwen2.5-7B",
24+
"api_base": "http://localhost:8088/v1",
25+
"temperature": 0.7,
26+
"max_tokens": 1024,
27+
"model_schema": "memos.configs.llm.VLLMLLMConfig",
28+
},
29+
},
30+
},
31+
)
32+
33+
# 2. Instantiate VLLMKVCacheMemory using the factory
34+
print("Initializing VLLM KV Cache Memory...")
35+
vllm_kv_mem = MemoryFactory.from_config(config)
36+
print("✓ VLLM KV Cache Memory initialized successfully.\n")
37+
38+
# 3. Extract a VLLMKVCacheItem from a prompt
39+
print("===== Extract VLLMKVCacheItem =====")
40+
system_prompt = [
41+
{"role": "system", "content": "You are a helpful AI assistant."},
42+
{"role": "user", "content": "What is MemOS?"},
43+
{"role": "assistant", "content": "MemOS is a memory operating system for LLMs."},
44+
]
45+
46+
try:
47+
cache_item = vllm_kv_mem.extract(system_prompt)
48+
print("✓ KV cache item extracted successfully")
49+
print(f" ID: {cache_item.id}")
50+
print(f" Memory (prompt): {cache_item.memory[:100]}...")
51+
print(f" Metadata: {cache_item.metadata}")
52+
print()
53+
except Exception as e:
54+
print(f"✗ Failed to extract KV cache item: {e}")
55+
return
56+
57+
# 4. Add the extracted VLLMKVCacheItem
58+
print("===== Add VLLMKVCacheItem =====")
59+
vllm_kv_mem.add([cache_item])
60+
all_items = vllm_kv_mem.get_all()
61+
print(f"✓ Added cache item. Total items: {len(all_items)}")
62+
print()
63+
64+
# 5. Get by id
65+
print("===== Get VLLMKVCacheItem by id =====")
66+
retrieved = vllm_kv_mem.get(cache_item.id)
67+
if retrieved:
68+
print(f"✓ Retrieved cache item: {retrieved.id}")
69+
print(f" Memory (prompt): {retrieved.memory[:100]}...")
70+
else:
71+
print("✗ Failed to retrieve cache item")
72+
print()
73+
74+
# 6. Get cache (returns prompt string for vLLM)
75+
print("===== Get Cache (Prompt String) =====")
76+
prompt_string = vllm_kv_mem.get_cache([cache_item.id])
77+
if prompt_string:
78+
print(f"✓ Retrieved prompt string: {prompt_string[:100]}...")
79+
print(" This prompt can be used for vLLM generation with preloaded KV cache")
80+
else:
81+
print("✗ Failed to retrieve prompt string")
82+
print()
83+
84+
# 7. Extract another cache item for demonstration
85+
print("===== Extract Another VLLMKVCacheItem =====")
86+
another_prompt = [
87+
{"role": "system", "content": "You are a coding assistant."},
88+
{"role": "user", "content": "Write a Python function to calculate fibonacci numbers."},
89+
]
90+
91+
try:
92+
cache_item2 = vllm_kv_mem.extract(another_prompt)
93+
vllm_kv_mem.add([cache_item2])
94+
print(f"✓ Added second cache item. Total items: {len(vllm_kv_mem.get_all())}")
95+
print()
96+
except Exception as e:
97+
print(f"✗ Failed to extract second KV cache item: {e}")
98+
print()
99+
100+
# 8. Preload KV cache on vLLM server
101+
print("===== Preload KV Cache on vLLM Server =====")
102+
try:
103+
vllm_kv_mem.preload_kv_cache([cache_item.id, cache_item2.id])
104+
print("✓ KV cache preloaded on vLLM server successfully")
105+
print(" The server now has the KV cache ready for fast generation")
106+
except Exception as e:
107+
print(f"✗ Failed to preload KV cache: {e}")
108+
print()
109+
110+
# 9. Delete one item
111+
print("===== Delete One VLLMKVCacheItem =====")
112+
vllm_kv_mem.delete([cache_item.id])
113+
remaining_items = vllm_kv_mem.get_all()
114+
print(f"✓ Deleted cache item. Remaining items: {len(remaining_items)}")
115+
print()
116+
117+
# 10. Dump and load
118+
print("===== Dump and Load VLLMKVCacheMemory =====")
119+
try:
120+
vllm_kv_mem.dump("tmp/vllm_kv_mem")
121+
print("✓ Memory dumped to 'tmp/vllm_kv_mem'")
122+
123+
# Clear memory and reload
124+
vllm_kv_mem.delete_all()
125+
vllm_kv_mem.load("tmp/vllm_kv_mem")
126+
reloaded_items = vllm_kv_mem.get_all()
127+
print(f"✓ Memory loaded from 'tmp/vllm_kv_mem': {len(reloaded_items)} items")
128+
except Exception as e:
129+
print(f"✗ Failed to dump/load memory: {e}")
130+
print()
131+
132+
print("=== Example completed successfully ===")
133+
134+
135+
if __name__ == "__main__":
136+
main()

examples/data/config/mem_scheduler/general_scheduler_config.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@ backend: general_scheduler
22
config:
33
top_k: 10
44
top_n: 5
5-
act_mem_update_interval: 300
5+
act_mem_update_interval: 30
66
context_window_size: 5
7-
activation_mem_size: 5
87
thread_pool_max_workers: 5
98
consume_interval_seconds: 3
109
enable_parallel_dispatch: true

examples/data/config/mem_scheduler/mem_cube_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ text_mem:
2020
graph_db:
2121
backend: "neo4j"
2222
config:
23-
uri: "bolt://123.57.48.226:7687"
23+
uri: "bolt://localhost:7687"
2424
user: "neo4j"
2525
password: "12345678"
2626
db_name: "user11alice"

examples/data/config/mem_scheduler/memos_config_w_scheduler.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ mem_scheduler:
3434
config:
3535
top_k: 10
3636
top_n: 5
37-
act_mem_update_interval: 300
37+
act_mem_update_interval: 30
3838
context_window_size: 5
39-
activation_mem_size: 1000
4039
thread_pool_max_workers: 10
4140
consume_interval_seconds: 3
4241
enable_parallel_dispatch: true
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
user_id: "root"
2+
chat_model:
3+
backend: "huggingface"
4+
config:
5+
model_name_or_path: "Qwen/Qwen3-1.7B"
6+
temperature: 0.1
7+
remove_think_prefix: true
8+
max_tokens: 4096
9+
mem_reader:
10+
backend: "simple_struct"
11+
config:
12+
llm:
13+
backend: "openai"
14+
config:
15+
model_name_or_path: "gpt-4o-mini"
16+
temperature: 0.8
17+
max_tokens: 4096
18+
top_p: 0.9
19+
top_k: 50
20+
remove_think_prefix: true
21+
api_key: "sk-xxxxxx"
22+
api_base: "https://api.openai.com/v1"
23+
embedder:
24+
backend: "ollama"
25+
config:
26+
model_name_or_path: "nomic-embed-text:latest"
27+
chunker:
28+
backend: "sentence"
29+
config:
30+
tokenizer_or_token_counter: "gpt2"
31+
chunk_size: 512
32+
chunk_overlap: 128
33+
min_sentences_per_chunk: 1
34+
mem_scheduler:
35+
backend: "general_scheduler"
36+
config:
37+
top_k: 2
38+
top_n: 5
39+
act_mem_update_interval: 30
40+
context_window_size: 5
41+
thread_pool_max_workers: 10
42+
consume_interval_seconds: 3
43+
enable_parallel_dispatch: true
44+
max_turns_window: 20
45+
top_k: 5
46+
enable_textual_memory: true
47+
enable_activation_memory: true
48+
enable_parametric_memory: false
49+
enable_mem_scheduler: true

examples/mem_os/chat_w_generated_cube_explicit_memory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484
"graph_db": {
8585
"backend": "neo4j",
8686
"config": {
87-
"uri": "bolt://123.57.48.226:7687",
87+
"uri": "bolt://localhost:7687",
8888
"user": "neo4j",
8989
"password": "12345678",
9090
"db_name": "user03alice11",
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""
2+
Example demonstrating how to use MOSProduct for multi-user scenarios.
3+
"""
4+
5+
from memos.configs.mem_cube import GeneralMemCubeConfig
6+
from memos.configs.mem_os import MOSConfig
7+
from memos.mem_cube.general import GeneralMemCube
8+
from memos.mem_os.product import MOSProduct
9+
10+
11+
def get_config(user_name):
12+
openapi_config = {
13+
"model_name_or_path": "gpt-4o-mini",
14+
"temperature": 0.8,
15+
"max_tokens": 1024,
16+
"top_p": 0.9,
17+
"top_k": 50,
18+
"remove_think_prefix": True,
19+
"api_key": "your-api-key-here",
20+
"api_base": "https://api.openai.com/v1",
21+
}
22+
# Create a default configuration
23+
default_config = MOSConfig(
24+
user_id="root",
25+
chat_model={"backend": "openai", "config": openapi_config},
26+
mem_reader={
27+
"backend": "naive",
28+
"config": {
29+
"llm": {
30+
"backend": "openai",
31+
"config": openapi_config,
32+
},
33+
"embedder": {
34+
"backend": "ollama",
35+
"config": {
36+
"model_name_or_path": "nomic-embed-text:latest",
37+
},
38+
},
39+
},
40+
},
41+
enable_textual_memory=True,
42+
enable_activation_memory=False,
43+
top_k=5,
44+
max_turns_window=20,
45+
)
46+
default_cube_config = GeneralMemCubeConfig.model_validate(
47+
{
48+
"user_id": user_name,
49+
"cube_id": f"{user_name}_default_cube",
50+
"text_mem": {
51+
"backend": "tree_text",
52+
"config": {
53+
"extractor_llm": {"backend": "openai", "config": openapi_config},
54+
"dispatcher_llm": {"backend": "openai", "config": openapi_config},
55+
"graph_db": {
56+
"backend": "neo4j",
57+
"config": {
58+
"uri": "bolt://localhost:7687",
59+
"user": "neo4j",
60+
"password": "12345678",
61+
"db_name": user_name,
62+
"auto_create": True,
63+
},
64+
},
65+
"embedder": {
66+
"backend": "ollama",
67+
"config": {
68+
"model_name_or_path": "nomic-embed-text:latest",
69+
},
70+
},
71+
},
72+
},
73+
"act_mem": {},
74+
"para_mem": {},
75+
}
76+
)
77+
default_mem_cube = GeneralMemCube(default_cube_config)
78+
return default_config, default_mem_cube
79+
80+
81+
def main():
82+
default_config, default_mem_cube = get_config(user_name="alice")
83+
# Initialize MOSProduct with default config
84+
mos_product = MOSProduct(default_config=default_config)
85+
86+
# Register first user with default config
87+
result1 = mos_product.user_register(
88+
user_id="alice",
89+
user_name="alice",
90+
interests="I'm interested in machine learning and AI research.",
91+
default_mem_cube=default_mem_cube,
92+
)
93+
print(f"User registration result: {result1}")
94+
95+
# Chat with Alice
96+
print("\n=== Chatting with Alice ===")
97+
for response_chunk in mos_product.chat(query="What are my interests?", user_id="alice"):
98+
print(response_chunk, end="")
99+
100+
# Add memory for Alice
101+
mos_product.add(
102+
user_id="alice",
103+
memory_content="I attended a machine learning conference last week.",
104+
mem_cube_id=result1["default_cube_id"],
105+
)
106+
107+
# Search memories for Alice
108+
search_result = mos_product.search(query="conference", user_id="alice")
109+
print(f"\nSearch result for Alice: {search_result}")
110+
111+
# Search memories for Alice
112+
search_result = mos_product.get_all(query="conference", user_id="alice", memory_type="text_mem")
113+
print(f"\nSearch result for Alice: {search_result}")
114+
115+
# List all users
116+
users = mos_product.list_users()
117+
print(f"\nAll registered users: {users}")
118+
119+
# Get user info
120+
alice_info = mos_product.get_user_info("alice")
121+
print(f"\nAlice's info: {alice_info}")
122+
123+
124+
if __name__ == "__main__":
125+
main()

0 commit comments

Comments
 (0)