Skip to content

Commit 4fda8b6

Browse files
committed
refine knowledgebase and memory logs
1 parent 73f6665 commit 4fda8b6

File tree

4 files changed

+107
-50
lines changed

4 files changed

+107
-50
lines changed

veadk/knowledgebase/knowledgebase.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,13 @@ class KnowledgeBase:
2626
def __init__(
2727
self,
2828
backend: Literal["local", "opensearch", "viking", "redis", "mysql"] = "local",
29-
top_k: int = 5,
29+
top_k: int = 10,
3030
db_config=None,
3131
):
32-
logger.debug(f"Create knowledgebase, backend is {backend}")
32+
logger.info(
33+
f"Initializing knowledgebase: backend={backend} top_k={top_k} db_config={db_config}"
34+
)
35+
3336
self.backend = backend
3437
self.top_k = top_k
3538

@@ -38,6 +41,10 @@ def __init__(
3841
database_client=self.db_client
3942
)
4043

44+
logger.info(
45+
f"Initialized knowledgebase: db_client={self.db_client} adapter={self.adapter}"
46+
)
47+
4148
def add(
4249
self,
4350
data: str | list[str] | TextIO | BinaryIO | bytes,
@@ -51,9 +58,8 @@ def add(
5158
for example, if you read the file stream of a pdf, then you need to pass an additional parameter file_ext = '.pdf'.
5259
"""
5360
kwargs.pop("session_id", None) # remove session_id
54-
self.adapter.add(
55-
data, app_name, user_id="user_id", session_id="session_id", **kwargs
56-
)
61+
logger.info(f"Adding documents to knowledgebase: app_name={app_name}")
62+
self.adapter.add(data, app_name, **kwargs)
5763

5864
def search(self, query: str, app_name: str, top_k: int = None) -> list[str]:
5965
"""Retrieve documents similar to the query text in the vector database.
@@ -66,9 +72,10 @@ def search(self, query: str, app_name: str, top_k: int = None) -> list[str]:
6672
"""
6773
top_k = self.top_k if top_k is None else top_k
6874

69-
result = self.adapter.query(
70-
query=query, app_name=app_name, user_id="user_id", top_k=top_k
75+
logger.info(
76+
f"Searching knowledgebase: app_name={app_name} query={query} top_k={top_k}"
7177
)
78+
result = self.adapter.query(query=query, app_name=app_name, top_k=top_k)
7279
if len(result) == 0:
7380
logger.warning(f"No documents found in knowledgebase. Query: {query}")
7481
return result

veadk/knowledgebase/knowledgebase_database_adapter.py

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ def format_collection_name(collection_name: str) -> str:
3535
return re.sub(r"[^a-z0-9_]", "", replaced_str).lower()
3636

3737

38+
def build_index(**kwargs):
39+
"""
40+
Build the index name for the knowledgebase.
41+
"""
42+
# TODO
43+
...
44+
45+
3846
def get_knowledgebase_adapter(backend: str):
3947
if backend == DatabaseBackend.REDIS:
4048
return KnowledgebaseKVDatabaseAdapter
@@ -55,30 +63,22 @@ class KnowledgebaseKVDatabaseAdapter(BaseModel):
5563

5664
database_client: BaseDatabase
5765

58-
def add(self, content: list[str], app_name: str, user_id: str, session_id: str):
59-
"""Add texts to Redis.
60-
61-
Key: app_name
62-
Field: app_name:user_id
63-
Value: text in List
64-
"""
65-
# key = f"{app_name}:{user_id}"
66+
def add(self, content: list[str], app_name: str, **kwargs):
6667
key = f"{app_name}"
68+
logger.debug(f"Adding documents to Redis database: key={key}")
6769

6870
try:
6971
for _content in content:
7072
self.database_client.add(key, _content)
71-
logger.debug(
72-
f"Successfully added {len(content)} texts to Redis list key `{key}`."
73-
)
73+
logger.debug(f"Added {len(content)} texts to Redis database: key={key}")
7474
except Exception as e:
75-
logger.error(f"Failed to add texts to Redis list key `{key}`: {e}")
75+
logger.error(f"Failed to add texts to Redis database key `{key}`: {e}")
7676
raise e
7777

78-
def query(self, query: str, app_name: str, user_id: str, **kwargs):
79-
# key = f"{app_name}:{user_id}"
78+
def query(self, query: str, app_name: str, **kwargs):
8079
key = f"{app_name}"
8180
top_k = 10
81+
logger.debug(f"Querying Redis database: key={key} query={query}")
8282

8383
try:
8484
result = self.database_client.query(key, query)
@@ -87,9 +87,8 @@ def query(self, query: str, app_name: str, user_id: str, **kwargs):
8787
logger.error(f"Failed to search from Redis list key '{key}': {e}")
8888
raise e
8989

90-
def delete(self, app_name: str, user_id: str, session_id: str):
90+
def delete(self, app_name: str, **kwargs):
9191
try:
92-
# key = f"{app_name}:{user_id}:{session_id}"
9392
key = f"{app_name}"
9493
self.database_client.delete(key=key)
9594
logger.info(f"Successfully deleted data for app {app_name}")
@@ -104,6 +103,8 @@ class KnowledgebaseRelationalDatabaseAdapter(BaseModel):
104103
database_client: BaseDatabase
105104

106105
def create_table(self, table_name: str):
106+
logger.debug(f"Creating table for SQL database: table_name={table_name}")
107+
107108
sql = f"""
108109
CREATE TABLE `{table_name}` (
109110
`id` BIGINT AUTO_INCREMENT PRIMARY KEY,
@@ -113,8 +114,11 @@ def create_table(self, table_name: str):
113114
"""
114115
self.database_client.add(sql)
115116

116-
def add(self, content: list[str], app_name: str, user_id: str, session_id: str):
117+
def add(self, content: list[str], app_name: str, **kwargs):
117118
table = app_name
119+
logger.debug(
120+
f"Adding documents to SQL database: table_name={table} content_len={len(content)}"
121+
)
118122

119123
if not self.database_client.table_exists(table):
120124
logger.warning(f"Table {table} does not exist, creating...")
@@ -126,16 +130,19 @@ def add(self, content: list[str], app_name: str, user_id: str, session_id: str):
126130
VALUES (%s);
127131
"""
128132
self.database_client.add(sql, params=(_content,))
129-
logger.info(f"Successfully added {len(content)} texts to table {table}.")
133+
logger.debug(f"Added {len(content)} texts to table {table}.")
130134

131-
def query(self, query: str, app_name: str, user_id: str, **kwargs):
135+
def query(self, query: str, app_name: str, **kwargs):
132136
"""Search content from table app_name."""
133137
table = app_name
134138
top_k = 10
139+
logger.debug(
140+
f"Querying SQL database: table_name={table} query={query} top_k={top_k}"
141+
)
135142

136143
if not self.database_client.table_exists(table):
137144
logger.warning(
138-
f"querying {query}, but table `{table}` does not exist, returning empty list."
145+
f"Querying SQL database, but table `{table}` does not exist, returning empty list."
139146
)
140147
return []
141148

@@ -145,7 +152,7 @@ def query(self, query: str, app_name: str, user_id: str, **kwargs):
145152
results = self.database_client.query(sql)
146153
return [item["data"] for item in results]
147154

148-
def delete(self, app_name: str, user_id: str, session_id: str):
155+
def delete(self, app_name: str, **kwargs):
149156
table = app_name
150157
try:
151158
self.database_client.delete(table=table)
@@ -160,23 +167,23 @@ class KnowledgebaseVectorDatabaseAdapter(BaseModel):
160167

161168
database_client: BaseDatabase
162169

163-
def add(self, content: list[str], app_name: str, user_id: str, session_id: str):
164-
# collection_name = format_collection_name(f"{app_name}_{user_id}")
165-
# knowledgebase is application specific
170+
def add(self, content: list[str], app_name: str, **kwargs):
171+
logger.debug(
172+
f"Adding documents to vector database: app_name={app_name} content_len={len(content)}"
173+
)
166174
collection_name = format_collection_name(f"{app_name}")
167175
self.database_client.add(content, collection_name=collection_name)
168176

169-
def query(self, query: str, app_name: str, user_id: str, **kwargs):
170-
# collection_name = format_collection_name(f"{app_name}_{user_id}")
171-
# knowledgebase is application specific
177+
def query(self, query: str, app_name: str, **kwargs):
178+
logger.debug(
179+
f"Querying vector database: collection_name={app_name} query={query}"
180+
)
172181
collection_name = format_collection_name(f"{app_name}")
173182
return self.database_client.query(
174183
query, collection_name=collection_name, **kwargs
175184
)
176185

177-
def delete(self, app_name: str, user_id: str, session_id: str):
178-
# collection_name = format_collection_name(f"{app_name}_{user_id}")
179-
# knowledgebase is application specific
186+
def delete(self, app_name: str, **kwargs):
180187
collection_name = format_collection_name(f"{app_name}")
181188
try:
182189
self.database_client.delete(collection_name=collection_name)
@@ -193,13 +200,13 @@ class KnowledgebaseLocalDatabaseAdapter(BaseModel):
193200

194201
database_client: BaseDatabase
195202

196-
def add(self, content: list[str], app_name: str, user_id: str, session_id: str):
203+
def add(self, content: list[str], **kwargs):
197204
self.database_client.add(content)
198205

199206
def query(self, query: str, app_name: str, user_id: str, **kwargs):
200207
return self.database_client.query(query, **kwargs)
201208

202-
def delete(self, app_name: str, user_id: str, session_id: str):
209+
def delete(self, app_name: str, **kwargs):
203210
try:
204211
self.database_client.delete()
205212
logger.info(f"Successfully cleared local database for app {app_name}")
@@ -229,24 +236,30 @@ def add(
229236
self,
230237
content: str | list[str] | TextIO | BinaryIO | bytes,
231238
app_name: str,
232-
user_id: str,
233-
session_id: str,
234239
**kwargs,
235240
):
236-
# collection_name = format_collection_name(f"{app_name}_{user_id}")
237241
collection_name = format_collection_name(f"{app_name}")
242+
logger.debug(
243+
f"Adding documents to Viking database: collection_name={collection_name}"
244+
)
238245
self.get_or_create_collection(collection_name)
239246
self.database_client.add(content, collection_name=collection_name, **kwargs)
240247

241-
def query(self, query: str, app_name: str, user_id: str, **kwargs):
248+
def query(self, query: str, app_name: str, **kwargs):
242249
collection_name = format_collection_name(f"{app_name}")
250+
logger.debug(
251+
f"Querying Viking database: collection_name={collection_name} query={query}"
252+
)
253+
254+
# FIXME(): fix here
243255
if not self.database_client.collection_exists(collection_name):
244256
raise ValueError(f"Collection {collection_name} does not exist")
257+
245258
return self.database_client.query(
246259
query, collection_name=collection_name, **kwargs
247260
)
248261

249-
def delete(self, app_name: str, user_id: str, session_id: str):
262+
def delete(self, app_name: str, **kwargs):
250263
# collection_name = format_collection_name(f"{app_name}_{user_id}")
251264
collection_name = format_collection_name(f"{app_name}")
252265
try:

veadk/memory/long_term_memory.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from google.genai import types
2626
from typing_extensions import override
2727

28-
from veadk.config import getenv
2928
from veadk.database import DatabaseFactory
3029
from veadk.utils.logger import get_logger
3130

@@ -40,20 +39,27 @@ def __init__(
4039
backend: Literal[
4140
"local", "opensearch", "redis", "mysql", "viking"
4241
] = "opensearch",
43-
top_k: int = getenv("LONGTERM_MEMORY_TOP_K", 3),
42+
top_k: int = 5,
4443
):
4544
if backend == "viking":
4645
backend = "viking_mem"
4746
self.top_k = top_k
4847
self.backend = backend
4948

49+
logger.info(
50+
f"Initializing long term memory: backend={self.backend} top_k={self.top_k}"
51+
)
52+
5053
self.db_client = DatabaseFactory.create(
5154
backend=backend,
5255
)
53-
logger.info(f"Long term memory backend is `{backend}`.")
5456

5557
self.adapter = get_memory_adapter(backend)(database_client=self.db_client)
5658

59+
logger.info(
60+
f"Initialized long term memory: db_client={self.db_client} adapter={self.adapter}"
61+
)
62+
5763
@override
5864
async def add_session_to_memory(
5965
self,
@@ -63,6 +69,8 @@ async def add_session_to_memory(
6369
for event in session.events:
6470
if not event.content or not event.content.parts:
6571
continue
72+
if not event.author == "user": # we only add user event to memory
73+
continue
6674

6775
message = event.content.model_dump(exclude_none=True, mode="json")
6876
if (
@@ -77,37 +85,58 @@ async def add_session_to_memory(
7785
session_id=session.id,
7886
)
7987

88+
logger.info(
89+
f"Added {len(event_list)} events to long term memory: app_name={session.app_name} user_id={session.user_id} session_id={session.id}"
90+
)
91+
8092
@override
8193
async def search_memory(self, *, app_name: str, user_id: str, query: str):
94+
logger.info(
95+
f"Searching long term memory: query={query} app_name={app_name} user_id={user_id}"
96+
)
8297
memory_chunks = self.adapter.query(
8398
query=query,
8499
app_name=app_name,
85100
user_id=user_id,
86101
)
87102
if len(memory_chunks) == 0:
103+
logger.info(
104+
f"Found no memory chunks for query: {query} app_name={app_name} user_id={user_id}"
105+
)
88106
return SearchMemoryResponse()
89107

108+
logger.info(
109+
f"Found {len(memory_chunks)} memory chunks for query: {query} app_name={app_name} user_id={user_id}"
110+
)
111+
90112
memory_events = []
91113
for memory in memory_chunks:
92114
try:
93115
memory_dict = json.loads(memory)
94116
try:
95117
text = memory_dict["parts"][0]["text"]
96118
role = memory_dict["role"]
97-
except KeyError as e:
98-
logger.error(
99-
f"Memory content: {memory_dict}. Error parsing memory: {e}"
119+
except KeyError as _:
120+
# prevent not a standard text-based event
121+
logger.warning(
122+
f"Memory content: {memory_dict}. Skip return this memory."
100123
)
101124
continue
102125
except json.JSONDecodeError:
126+
# prevent the memory string is not dumped by `event`
103127
text = memory
104128
role = "user"
129+
105130
memory_events.append(
106131
MemoryEntry(
107132
author="user",
108133
content=types.Content(parts=[types.Part(text=text)], role=role),
109134
)
110135
)
136+
137+
logger.info(
138+
f"Return {len(memory_events)} memory events for query: {query} app_name={app_name} user_id={user_id}"
139+
)
111140
return SearchMemoryResponse(memories=memory_events)
112141

113142
@override

veadk/memory/memory_database_adapter.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ def format_collection_name(collection_name: str) -> str:
3333
return re.sub(r"[^a-z0-9_]", "", replaced_str).lower()
3434

3535

36+
def build_index(**kwargs):
37+
"""
38+
Build the index name for the long-term memory.
39+
"""
40+
# TODO
41+
...
42+
43+
3644
def get_memory_adapter(backend: str):
3745
if backend == DatabaseBackend.REDIS:
3846
return MemoryKVDatabaseAdapter

0 commit comments

Comments
 (0)