Skip to content

Commit f613950

Browse files
committed
add ltm
1 parent 110d02d commit f613950

File tree

8 files changed

+490
-56
lines changed

8 files changed

+490
-56
lines changed

veadk/knowledgebase/backends/base_backend.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,29 @@ class BaseKnowledgebaseBackend(ABC, BaseModel):
2424
@abstractmethod
2525
def add_from_directory(self, directory: str, **kwargs) -> bool:
2626
"""Add knowledge from file path to knowledgebase"""
27-
...
2827

2928
@abstractmethod
3029
def add_from_files(self, files: list[str], **kwargs) -> bool:
3130
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
32-
...
3331

3432
@abstractmethod
3533
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
3634
"""Add knowledge from text to knowledgebase"""
37-
...
3835

3936
@abstractmethod
4037
def search(self, **kwargs) -> list:
4138
"""Search knowledge from knowledgebase"""
42-
...
4339

44-
def delete(self, **kwargs) -> bool:
45-
"""Delete knowledge from knowledgebase"""
46-
...
40+
# Optional methods for future use:
41+
# - `delete`: Delete collection or documents
42+
# - `list_docs`: List original documents
43+
# - `list_chunks`: List embedded document chunks
4744

48-
def list_docs(self, **kwargs) -> None:
49-
"""List original documents in knowledgebase"""
50-
pass
45+
# def delete(self, **kwargs) -> bool:
46+
# """Delete knowledge from knowledgebase"""
5147

52-
def list_chunks(self, **kwargs) -> None:
53-
"""List embeded document chunks in knowledgebase"""
54-
pass
48+
# def list_docs(self, **kwargs) -> None:
49+
# """List original documents in knowledgebase"""
50+
51+
# def list_chunks(self, **kwargs) -> None:
52+
# """List embeded document chunks in knowledgebase"""

veadk/knowledgebase/backends/redis_backend.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
from llama_index.core.schema import BaseNode
2222
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
2323
from llama_index.vector_stores.redis import RedisVectorStore
24+
from llama_index.vector_stores.redis.schema import (
25+
RedisIndexInfo,
26+
RedisVectorStoreSchema,
27+
)
2428
from pydantic import Field
2529
from redis import Redis
2630
from typing_extensions import Any, override
@@ -56,8 +60,14 @@ def model_post_init(self, __context: Any) -> None:
5660
api_base=self.embedding_config.api_base,
5761
)
5862

63+
self._schema = RedisVectorStoreSchema(
64+
index=RedisIndexInfo(name=self.index),
65+
)
5966
self._vector_store = RedisVectorStore(
60-
redis_client=self._redis_client, overwrite=True
67+
schema=self._schema,
68+
redis_client=self._redis_client,
69+
overwrite=True,
70+
collection_name=self.index,
6171
)
6272

6373
self._storage_context = StorageContext.from_defaults(

veadk/memory/long_term_memory.py

Lines changed: 88 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,45 +25,83 @@
2525
from google.adk.memory.memory_entry import MemoryEntry
2626
from google.adk.sessions import Session
2727
from google.genai import types
28-
from pydantic import BaseModel
28+
from pydantic import BaseModel, Field
2929
from typing_extensions import override
3030

31-
from veadk.database import DatabaseFactory
32-
from veadk.database.database_adapter import get_long_term_memory_database_adapter
31+
from veadk.memory.long_term_memory_backends.base_backend import (
32+
BaseLongTermMemoryBackend,
33+
)
34+
from veadk.memory.long_term_memory_backends.in_memory_backend import InMemoryLTMBackend
35+
from veadk.memory.long_term_memory_backends.opensearch_backend import (
36+
OpensearchLTMBackend,
37+
)
38+
from veadk.memory.long_term_memory_backends.redis_backend import RedisLTMBackend
39+
from veadk.memory.long_term_memory_backends.vikingdb_memory_backend import (
40+
VikingDBKnowledgeBackend,
41+
)
3342
from veadk.utils.logger import get_logger
3443

3544
logger = get_logger(__name__)
3645

3746

47+
BACKEND_CLS = {
48+
"local": InMemoryLTMBackend,
49+
"opensearch": OpensearchLTMBackend,
50+
"viking": VikingDBKnowledgeBackend,
51+
"viking_mem": VikingDBKnowledgeBackend,
52+
"redis": RedisLTMBackend,
53+
}
54+
55+
3856
def build_long_term_memory_index(app_name: str, user_id: str):
3957
return f"{app_name}_{user_id}"
4058

4159

4260
class LongTermMemory(BaseMemoryService, BaseModel):
43-
backend: Literal[
44-
"local", "opensearch", "redis", "mysql", "viking", "viking_mem"
45-
] = "opensearch"
61+
backend: Literal["local", "opensearch", "redis", "viking", "viking_mem"] = (
62+
"opensearch"
63+
)
64+
"""Long term memory backend type"""
65+
66+
backend_config: dict = Field(default_factory=dict)
67+
"""Long term memory backend configuration"""
68+
69+
backend_instance: BaseLongTermMemoryBackend | None = None
70+
"""An instance of a long term memory backend that implements the `BaseLongTermMemoryBackend` interface."""
71+
4672
top_k: int = 5
73+
"""Number of top similar documents to retrieve during search."""
4774

48-
def model_post_init(self, __context: Any) -> None:
49-
if self.backend == "viking":
50-
logger.warning(
51-
"`viking` backend is deprecated, switching to `viking_mem` backend."
52-
)
53-
self.backend = "viking_mem"
75+
app_name: str = ""
5476

55-
logger.info(
56-
f"Initializing long term memory: backend={self.backend} top_k={self.top_k}"
57-
)
77+
user_id: str = ""
5878

59-
self._db_client = DatabaseFactory.create(
60-
backend=self.backend,
61-
)
62-
self._adapter = get_long_term_memory_database_adapter(self._db_client)
79+
def model_post_init(self, __context: Any) -> None:
80+
self._backend = None
6381

64-
logger.info(
65-
f"Initialized long term memory: db_client={self._db_client.__class__.__name__} adapter={self._adapter}"
66-
)
82+
if self.backend_instance:
83+
self._backend = self.backend_instance
84+
logger.info(
85+
f"Initialized long term memory with provided backend instance {self._backend.__class__.__name__}"
86+
)
87+
else:
88+
if self.backend_config:
89+
logger.info(
90+
f"Initialized long term memory backend {self.backend} with config."
91+
)
92+
self._backend = BACKEND_CLS[self.backend](**self.backend_config)
93+
elif self.app_name and self.user_id:
94+
self.index = build_long_term_memory_index(
95+
app_name=self.app_name, user_id=self.user_id
96+
)
97+
logger.info(f"Long term memory index set to {self.index}.")
98+
self._backend = BACKEND_CLS[self.backend](
99+
**self.backend_config, index=self.index
100+
)
101+
else:
102+
logger.warning(
103+
"Neither `backend_instance`, `backend_config`, nor `app_name`/`user_id` is provided, the long term memory storage will initialize when adding a session."
104+
)
67105

68106
def _filter_and_convert_events(self, events: list[Event]) -> list[str]:
69107
final_events = []
@@ -91,40 +129,48 @@ async def add_session_to_memory(
91129
self,
92130
session: Session,
93131
):
132+
app_name = session.app_name
133+
user_id = session.user_id
134+
135+
if self.index != build_long_term_memory_index(app_name, user_id):
136+
logger.warning(
137+
f"The `app_name` or `user_id` is different from the initialized one, skip add session to memory. Initialized index: {self.index}, current built index: {build_long_term_memory_index(app_name, user_id)}"
138+
)
139+
return
140+
141+
if not self._backend:
142+
self.index = build_long_term_memory_index(app_name, user_id)
143+
self._backend = BACKEND_CLS[self.backend](index=self.index)
144+
logger.info(
145+
f"Initialize long term memory backend now, index is {self.index}"
146+
)
147+
94148
event_strings = self._filter_and_convert_events(session.events)
95-
index = build_long_term_memory_index(session.app_name, session.user_id)
96149

97150
logger.info(
98-
f"Adding {len(event_strings)} events to long term memory: index={index}"
151+
f"Adding {len(event_strings)} events to long term memory: index={self.index}"
99152
)
100153

101-
# check if viking memory database, should give a user id: if/else
102-
if self.backend == "viking_mem":
103-
self._adapter.add(data=event_strings, index=index, user_id=session.user_id)
104-
else:
105-
self._adapter.add(data=event_strings, index=index)
154+
self._backend.save_memory(event_strings=event_strings)
106155

107156
logger.info(
108-
f"Added {len(event_strings)} events to long term memory: index={index}"
157+
f"Added {len(event_strings)} events to long term memory: index={self.index}"
109158
)
110159

111160
@override
112161
async def search_memory(self, *, app_name: str, user_id: str, query: str):
113-
index = build_long_term_memory_index(app_name, user_id)
114-
115162
logger.info(
116-
f"Searching long term memory: query={query} index={index} top_k={self.top_k}"
163+
f"Searching long term memory: query={query} index={self.index} top_k={self.top_k}"
117164
)
118165

119-
# user id if viking memory db
120-
if self.backend == "viking_mem":
121-
memory_chunks = self._adapter.query(
122-
query=query, index=index, top_k=self.top_k, user_id=user_id
123-
)
124-
else:
125-
memory_chunks = self._adapter.query(
126-
query=query, index=index, top_k=self.top_k
166+
# prevent model invoke `load_memory` before add session to this memory
167+
if not self._backend:
168+
logger.error(
169+
"Long term memory backend is not initialized, cannot search memory."
127170
)
171+
return SearchMemoryResponse(memories=[])
172+
173+
memory_chunks = self._backend.search_memory(query=query, top_k=self.top_k)
128174

129175
memory_events = []
130176
for memory in memory_chunks:
@@ -152,6 +198,6 @@ async def search_memory(self, *, app_name: str, user_id: str, query: str):
152198
)
153199

154200
logger.info(
155-
f"Return {len(memory_events)} memory events for query: {query} index={index}"
201+
f"Return {len(memory_events)} memory events for query: {query} index={self.index}"
156202
)
157203
return SearchMemoryResponse(memories=memory_events)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from abc import ABC, abstractmethod
16+
17+
from pydantic import BaseModel
18+
19+
20+
class BaseLongTermMemoryBackend(ABC, BaseModel):
21+
index: str
22+
23+
@abstractmethod
24+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
25+
"""Save memory to long term memory backend"""
26+
27+
@abstractmethod
28+
def search_memory(self, query: str, **kwargs) -> list[str]:
29+
"""Retrieve memory from long term memory backend"""
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from llama_index.core import Document, VectorStoreIndex
16+
from llama_index.core.schema import BaseNode
17+
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
18+
from pydantic import Field
19+
from typing_extensions import Any, override
20+
21+
from veadk.configs.model_configs import EmbeddingModelConfig
22+
from veadk.knowledgebase.backends.utils import get_llama_index_splitter
23+
from veadk.memory.long_term_memory_backends.base_backend import (
24+
BaseLongTermMemoryBackend,
25+
)
26+
27+
28+
class InMemoryLTMBackend(BaseLongTermMemoryBackend):
29+
embedding_config: EmbeddingModelConfig = Field(default_factory=EmbeddingModelConfig)
30+
"""Embedding model configs"""
31+
32+
def model_post_init(self, __context: Any) -> None:
33+
self._embed_model = OpenAILikeEmbedding(
34+
model_name=self.embedding_config.name,
35+
api_key=self.embedding_config.api_key,
36+
api_base=self.embedding_config.api_base,
37+
)
38+
self._vector_index = VectorStoreIndex([], embed_model=self._embed_model)
39+
self._retriever = self._vector_index.as_retriever()
40+
41+
@override
42+
def save_memory(self, event_strings: list[str], **kwargs) -> bool:
43+
for event_string in event_strings:
44+
document = Document(text=event_string)
45+
nodes = self._split_documents([document])
46+
self._vector_index.insert_nodes(nodes)
47+
return True
48+
49+
@override
50+
def search_memory(self, query: str, top_k: int, **kwargs) -> list[str]:
51+
retrieved_nodes = self._retriever.retrieve(query, top_k=top_k)
52+
return [node.text for node in retrieved_nodes]
53+
54+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
55+
"""Split document into chunks"""
56+
nodes = []
57+
for document in documents:
58+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
59+
_nodes = splitter.get_nodes_from_documents([document])
60+
nodes.extend(_nodes)
61+
return nodes

0 commit comments

Comments
 (0)