Skip to content

Commit 8bb2303

Browse files
authored
fix oracle to store collection description (#130)
* fix oracle tables use two tables to save collection information and vector items * fix lint * fix lint * fix lint * fix lint
1 parent e305ef6 commit 8bb2303

File tree

1 file changed

+51
-37
lines changed

1 file changed

+51
-37
lines changed

deepsearcher/vector_db/oracle.py

Lines changed: 51 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(
4848
increment=increment,
4949
)
5050
log.color_print(f"Connected to Oracle database at {dsn}")
51+
self.check_table()
5152
except Exception as e:
5253
log.critical(f"Failed to connect to Oracle database at {dsn}")
5354
log.critical(f"Oracle database error in init: {e}")
@@ -148,30 +149,34 @@ def has_collection(self, collection: str = "deepsearcher"):
148149
else:
149150
return False
150151

151-
def has_table(self, collection: str = "deepsearcher"):
152+
def check_table(self):
152153
SQL = SQL_TEMPLATES["has_table"]
153-
res = self.query(SQL)
154-
if res:
155-
if res[0]["rowcnt"] > 0:
156-
return True
157-
else:
158-
return False
159-
else:
160-
return False
154+
try:
155+
res = self.query(SQL)
156+
if len(res) < 2:
157+
missing_table = TABLES.keys() - set([i["table_name"] for i in res])
158+
for table in missing_table:
159+
self.create_tables(table)
160+
except Exception as e:
161+
log.critical(f"Failed to check table in Oracle database, error info: {e}")
162+
raise
161163

162-
def create_tables(self):
163-
SQL = SQL_TEMPLATES["ddl"]
164+
def create_tables(self, table_name):
165+
SQL = TABLES[table_name]
164166
try:
165167
self.execute(SQL)
166-
log.color_print("Created table DEEPSEARCHER in Oracle database")
167-
except Exception:
168-
log.critical("Failed to create table DEEPSEARCHER in Oracle database")
168+
log.color_print(f"Created table {table_name} in Oracle database")
169+
except Exception as e:
170+
log.critical(f"Failed to create table {table_name} in Oracle database, error info: {e}")
169171
raise
170172

171173
def drop_collection(self, collection: str = "deepsearcher"):
172174
try:
173-
SQL = SQL_TEMPLATES["drop_collection"]
174175
params = {"collection": collection}
176+
SQL = SQL_TEMPLATES["drop_collection"]
177+
self.execute(SQL, params)
178+
179+
SQL = SQL_TEMPLATES["drop_collection_item"]
175180
self.execute(SQL, params)
176181
log.color_print(f"Collection {collection} dropped")
177182
except Exception as e:
@@ -227,22 +232,18 @@ def init_collection(
227232
collection = self.default_collection
228233
if description is None:
229234
description = ""
230-
try:
231-
has_table = self.has_table()
232-
if not has_table:
233-
self.create_table()
234-
except Exception as e:
235-
log.critical(f"fail to init db for oracle, error info: {e}")
236-
237235
try:
238236
has_collection = self.has_collection(collection)
239237
if force_new_collection and has_collection:
240238
self.drop_collection(collection)
241239
elif has_collection:
242240
return
241+
# insert collection info
242+
SQL = SQL_TEMPLATES["insert_collection"]
243+
params = {"collection": collection, "description": description}
244+
self.execute(SQL, params)
243245
except Exception as e:
244-
log.critical(f"fail to init db for milvus, error info: {e}")
245-
raise
246+
log.critical(f"fail to init_collection for oracle, error info: {e}")
246247

247248
def insert_data(
248249
self,
@@ -315,16 +316,13 @@ def list_collections(self, *args, **kwargs) -> List[CollectionInfo]:
315316
collections = self.query(SQL)
316317
if collections:
317318
for collection in collections:
318-
description = ""
319319
collection_infos.append(
320320
CollectionInfo(
321321
collection_name=collection["collection"],
322-
description=description,
322+
description=collection["description"],
323323
)
324324
)
325-
return collection_infos
326-
else:
327-
log.critical("No collections found")
325+
return collection_infos
328326
except Exception as e:
329327
log.critical(f"fail to list collections, error info: {e}")
330328
raise
@@ -339,8 +337,15 @@ def clear_db(self, collection: str = "deepsearcher", *args, **kwargs):
339337
raise
340338

341339

342-
SQL_TEMPLATES = {
343-
"ddl": """CREATE TABLE DEEPSEARCHER (
340+
TABLES = {
341+
"DEEPSEARCHER_COLLECTION_INFO": """CREATE TABLE DEEPSEARCHER_COLLECTION_INFO (
342+
id INT generated by default as identity primary key,
343+
collection varchar(256),
344+
description CLOB,
345+
status NUMBER DEFAULT 1,
346+
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
347+
updatetime TIMESTAMP DEFAULT NULL)""",
348+
"DEEPSEARCHER_COLLECTION_ITEM": """CREATE TABLE DEEPSEARCHER_COLLECTION_ITEM (
344349
id INT generated by default as identity primary key,
345350
collection varchar(256),
346351
embedding VECTOR,
@@ -350,15 +355,24 @@ def clear_db(self, collection: str = "deepsearcher", *args, **kwargs):
350355
status NUMBER DEFAULT 1,
351356
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
352357
updatetime TIMESTAMP DEFAULT NULL)""",
353-
"has_table": "SELECT count(*) as rowcnt FROM all_tables WHERE table_name='DEEPSEARCHER'",
354-
"has_collection": "select count(*) as rowcnt from DEEPSEARCHER where collection=:collection and status=1",
355-
"list_collections": "select distinct collection from DEEPSEARCHER where status=1",
356-
"drop_collection": "update DEEPSEARCHER set status=0 where collection=:collection and status=1",
357-
"insert": """INSERT INTO DEEPSEARCHER (collection,embedding,text,reference,metadata)
358+
}
359+
360+
SQL_TEMPLATES = {
361+
"has_table": f"""SELECT table_name FROM all_tables
362+
WHERE table_name in ({",".join([f"'{k}'" for k in TABLES.keys()])})""",
363+
"has_collection": "select count(*) as rowcnt from DEEPSEARCHER_COLLECTION_INFO where collection=:collection and status=1",
364+
"list_collections": "select collection,description from DEEPSEARCHER_COLLECTION_INFO where status=1",
365+
"drop_collection": "update DEEPSEARCHER_COLLECTION_INFO set status=0 where collection=:collection and status=1",
366+
"drop_collection_item": "update DEEPSEARCHER_COLLECTION_ITEM set status=0 where collection=:collection and status=1",
367+
"insert_collection": """INSERT INTO DEEPSEARCHER_COLLECTION_INFO (collection,description)
368+
values (:collection,:description)""",
369+
"insert": """INSERT INTO DEEPSEARCHER_COLLECTION_ITEM (collection,embedding,text,reference,metadata)
358370
values (:collection,:embedding,:text,:reference,:metadata)""",
359371
"search": """SELECT * FROM
360372
(SELECT t.*,
361373
VECTOR_DISTANCE(t.embedding,vector(:embedding_string,{dimension},{dtype}),COSINE) as distance
362-
FROM DEEPSEARCHER t WHERE t.collection=:collection)
374+
FROM DEEPSEARCHER_COLLECTION_ITEM t
375+
JOIN DEEPSEARCHER_COLLECTION_INFO c ON t.collection=c.collection
376+
WHERE t.collection=:collection AND t.status=1 AND c.status=1)
363377
WHERE distance<:max_distance ORDER BY distance ASC FETCH FIRST :top_k ROWS ONLY""",
364378
}

0 commit comments

Comments
 (0)