Added efficiency fixes to Milvus and MySQL

Yuval-Roth · olgaoznovich · Yuval-Roth · commit 5127d7700773 · 2025-06-20T12:54:21.000+03:00
Milvus:
started loading all the collections into the memory instead of loading the collection into memory anew every time

MySQL:
Added support for bulk insert to MySQL.
Had to change the id generation from AUTO INCREMENT to uuids generated before insertion.
This is because MySQL doesn't support bulk insert with returning all ids generated by auto increment.

Co-authored-by: olgaoznovich &lt;ol.oznovich@gmail.com&gt;
Co-authored-by: Yuval-Roth &lt;rothyuv@post.bgu.ac.il&gt;
diff --git a/data/mysql/init/init.sql b/data/mysql/init/init.sql
@@ -3,7 +3,7 @@ CREATE DATABASE IF NOT EXISTS `modelcache`;
 USE `modelcache`;
 
 CREATE TABLE IF NOT EXISTS `modelcache_llm_answer` (
-  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT comment '主键',
+  `id` CHAR(36) comment '主键',
   `gmt_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP comment '创建时间',
   `gmt_modified` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP comment '修改时间',
   `question` text NOT NULL comment 'question',
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -1,7 +1,7 @@
 name: "modelcache"
 services:
   mysql:
-    image: mysql:9.3.0
+    image: mysql:8.0.23
     container_name: mysql
     environment:
       MYSQL_ROOT_PASSWORD: 'root'
diff --git a/modelcache/adapter/adapter_query.py b/modelcache/adapter/adapter_query.py
@@ -11,7 +11,7 @@
 
 def adapt_query(cache_data_convert, *args, **kwargs):
     chat_cache = kwargs.pop("cache_obj", cache)
-    scope = kwargs.pop("scope", None)
+    scope = kwargs.pop("scope")
     model = scope['model']
     if not chat_cache.has_init:
         raise NotInitError()
diff --git a/modelcache/manager/scalar_data/sql_storage.py b/modelcache/manager/scalar_data/sql_storage.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import os
 import time
+import uuid
 
 import pymysql
 import json
@@ -42,26 +43,59 @@ def _insert(self, data: List):
         answer_type = 0
         embedding_data = embedding_data.tobytes()
         is_deleted = 0
+        _id = str(uuid.uuid4())
 
         table_name = "modelcache_llm_answer"
-        insert_sql = "INSERT INTO {} (question, answer, answer_type, model, embedding_data, is_deleted) VALUES (%s, %s, %s, %s, _binary%s, %s)".format(table_name)
+        insert_sql = f"""
+            INSERT INTO {table_name} 
+            (id, question, answer, answer_type, model, embedding_data, is_deleted)
+            VALUES (%s, %s, %s, %s, %s, _binary%s, %s)
+        """
         conn = self.pool.connection()
         try:
             with conn.cursor() as cursor:
                 # 执行插入数据操作
-                values = (question, answer, answer_type, model, embedding_data, is_deleted)
+                values = (_id, question, answer, answer_type, model, embedding_data, is_deleted)
                 cursor.execute(insert_sql, values)
                 conn.commit()
-                id = cursor.lastrowid
         finally:
             # 关闭连接，将连接返回给连接池
             conn.close()
-        return id
+        return _id
 
-    def batch_insert(self, all_data: List[CacheData]):
+    def batch_insert(self, all_data: List[List]):
+        table_name = "modelcache_llm_answer"
+        insert_sql = f"""
+            INSERT INTO {table_name}
+            (id, question, answer, answer_type, model, embedding_data, is_deleted)
+            VALUES (%s, %s, %s, %s, %s, %s, %s)
+        """
+
+        values_list = []
         ids = []
+
         for data in all_data:
-            ids.append(self._insert(data))
+            answer = data[0]
+            question = data[1]
+            embedding_data = data[2].tobytes()
+            model = data[3]
+            answer_type = 0
+            is_deleted = 0
+            _id = str(uuid.uuid4())
+            ids.append(_id)
+
+            values_list.append((
+                _id, question, answer, answer_type, model, embedding_data, is_deleted
+            ))
+
+        conn = self.pool.connection()
+        try:
+            with conn.cursor() as cursor:
+                cursor.executemany(insert_sql, values_list)
+                conn.commit()
+        finally:
+            conn.close()
+
         return ids
 
     def insert_query_resp(self, query_resp, **kwargs):
@@ -78,7 +112,11 @@ def insert_query_resp(self, query_resp, **kwargs):
             hit_query = json.dumps(hit_query, ensure_ascii=False)
 
         table_name = "modelcache_query_log"
-        insert_sql = "INSERT INTO {} (error_code, error_desc, cache_hit, model, query, delta_time, hit_query, answer) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)".format(table_name)
+        insert_sql = f"""
+            INSERT INTO {table_name} 
+            (error_code, error_desc, cache_hit, model, query, delta_time, hit_query, answer) 
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
+        """
         conn = self.pool.connection()
         try:
             with conn.cursor() as cursor:
@@ -92,15 +130,16 @@ def insert_query_resp(self, query_resp, **kwargs):
 
     def get_data_by_id(self, key: int):
         table_name = "modelcache_llm_answer"
-        query_sql = "select question, answer, embedding_data, model from {} where id={}".format(table_name, key)
-        conn_start = time.time()
+        query_sql = f"""
+            SELECT question, answer, embedding_data, model
+            FROM {table_name}
+            WHERE id = %s
+        """
         conn = self.pool.connection()
-
-        search_start = time.time()
         try:
             with conn.cursor() as cursor:
                 # 执行数据库操作
-                cursor.execute(query_sql)
+                cursor.execute(query_sql, (key,))
                 resp = cursor.fetchone()
         finally:
             # 关闭连接，将连接返回给连接池
@@ -113,14 +152,18 @@ def get_data_by_id(self, key: int):
 
     def update_hit_count_by_id(self, primary_id: int):
         table_name = "modelcache_llm_answer"
-        update_sql = "UPDATE {} SET hit_count = hit_count+1 WHERE id={}".format(table_name, primary_id)
+        update_sql = f"""
+            UPDATE {table_name} 
+            SET hit_count = hit_count+1 
+            WHERE id = %s
+        """
         conn = self.pool.connection()
 
         # 使用连接执行更新数据操作
         try:
             with conn.cursor() as cursor:
                 # 执行更新数据操作
-                cursor.execute(update_sql)
+                cursor.execute(update_sql,(primary_id,))
                 conn.commit()
         finally:
             # 关闭连接，将连接返回给连接池
@@ -129,12 +172,16 @@ def update_hit_count_by_id(self, primary_id: int):
     def get_ids(self, deleted=True):
         table_name = "modelcache_llm_answer"
         state = 1 if deleted else 0
-        query_sql = "Select id FROM {} WHERE is_deleted = {}".format(table_name, state)
+        query_sql = f"""
+            SELECT id 
+            FROM {table_name} 
+            WHERE is_deleted = %s
+        """
         
         conn = self.pool.connection()
         try:
             with conn.cursor() as cursor:
-                cursor.execute(query_sql)
+                cursor.execute(query_sql, (state,))
                 ids = [row[0] for row in cursor.fetchall()]
         finally:
             conn.close()
@@ -143,37 +190,45 @@ def get_ids(self, deleted=True):
 
     def mark_deleted(self, keys):
         table_name = "modelcache_llm_answer"
-        mark_sql = " update {} set is_deleted=1 WHERE id in ({})".format(table_name, ",".join([str(i) for i in keys]))
+        placeholders = ",".join(["%s"] * len(keys))
+        mark_sql = f"""
+            UPDATE {table_name}
+            SET is_deleted=1 
+            WHERE id in ({placeholders})
+        """
 
-        # 从连接池中获取连接
         conn = self.pool.connection()
         try:
             with conn.cursor() as cursor:
-                # 执行删除数据操作
-                cursor.execute(mark_sql)
+                cursor.execute(mark_sql, keys)
                 delete_count = cursor.rowcount
                 conn.commit()
         finally:
-            # 关闭连接，将连接返回给连接池
             conn.close()
         return delete_count
 
     def model_deleted(self, model_name):
         table_name = "modelcache_llm_answer"
-        delete_sql = "Delete from {} WHERE model='{}'".format(table_name, model_name)
+        delete_sql = f"""
+            Delete from {table_name}
+            WHERE model = %s
+        """
 
         table_log_name = "modelcache_query_log"
-        delete_log_sql = "Delete from {} WHERE model='{}'".format(table_log_name, model_name)
+        delete_log_sql = f"""
+            Delete from {table_log_name} 
+            WHERE model = %s
+        """
 
         conn = self.pool.connection()
         # 使用连接执行删除数据操作
         try:
             with conn.cursor() as cursor:
                 # 执行删除数据操作
-                resp = cursor.execute(delete_sql)
+                resp = cursor.execute(delete_sql, (model_name,))
                 conn.commit()
                 # 执行删除该模型对应日志操作 resp_log行数不返回
-                resp_log = cursor.execute(delete_log_sql) 
+                resp_log = cursor.execute(delete_log_sql, (model_name,))
                 conn.commit()  # 分别提交事务
         finally:
             # 关闭连接，将连接返回给连接池
@@ -182,7 +237,10 @@ def model_deleted(self, model_name):
 
     def clear_deleted_data(self):
         table_name = "modelcache_llm_answer"
-        delete_sql = "DELETE FROM {} WHERE is_deleted = 1".format(table_name)
+        delete_sql = f"""
+            DELETE FROM {table_name} 
+            WHERE is_deleted = 1
+        """
         
         conn = self.pool.connection()
         try:
@@ -197,10 +255,15 @@ def clear_deleted_data(self):
 
     def count(self, state: int = 0, is_all: bool = False):
         table_name = "modelcache_llm_answer"
+
+        # we're not using prepared statements here, so we need to ensure state is an integer
+        if not isinstance(state, int):
+            raise ValueError("'state' must be an integer.")
+
         if is_all:
-            count_sql = "SELECT COUNT(*) FROM {}".format(table_name)
+            count_sql = f"SELECT COUNT(*) FROM {table_name}"
         else:
-            count_sql = "SELECT COUNT(*) FROM {} WHERE is_deleted = {}".format(table_name,state)
+            count_sql = f"SELECT COUNT(*) FROM {table_name} WHERE is_deleted = {state}"
         
         conn = self.pool.connection()
         try:
diff --git a/modelcache/manager/vector_data/milvus.py b/modelcache/manager/vector_data/milvus.py
@@ -66,6 +66,8 @@ def __init__(
         self.search_params = (
             search_params or self.SEARCH_PARAM[self.index_params["index_type"]]
         )
+        self.collections = dict()
+
 
     def _connect(self, host, port, user, password, secure):
         try:
@@ -87,12 +89,14 @@ def _connect(self, host, port, user, password, secure):
                 timeout=10
             )
 
+
     def _create_collection(self, collection_name):
         if not utility.has_collection(collection_name, using=self.alias):
             schema = [
                 FieldSchema(
                     name="id",
-                    dtype=DataType.INT64,
+                    dtype=DataType.VARCHAR,
+                    max_length=36,
                     is_primary=True,
                     auto_id=False,
                 ),
@@ -101,67 +105,71 @@ def _create_collection(self, collection_name):
                 ),
             ]
             schema = CollectionSchema(schema)
-            self.col = Collection(
+
+            new_collection = Collection(
                 collection_name,
                 schema=schema,
                 consistency_level="Session",
                 using=self.alias,
             )
         else:
             modelcache_log.warning("The %s collection already exists, and it will be used directly.", collection_name)
-            self.col = Collection(
+            new_collection = Collection(
                 collection_name, consistency_level="Session", using=self.alias
             )
 
-        if len(self.col.indexes) == 0:
+        self.collections[collection_name] = new_collection
+
+        if len(new_collection.indexes) == 0:
             try:
                 modelcache_log.info("Attempting creation of Milvus index.")
-                self.col.create_index("embedding", index_params=self.index_params)
+                new_collection.create_index("embedding", index_params=self.index_params)
                 modelcache_log.info("Creation of Milvus index successful.")
             except MilvusException as e:
                 modelcache_log.warning("Error with building index: %s, and attempting creation of default index.", e)
                 i_p = {"metric_type": "L2", "index_type": "AUTOINDEX", "params": {}}
-                self.col.create_index("embedding", index_params=i_p)
+                new_collection.create_index("embedding", index_params=i_p)
                 self.index_params = i_p
         else:
-            self.index_params = self.col.indexes[0].to_dict()["index_param"]
+            self.index_params = new_collection.indexes[0].to_dict()["index_param"]
+
+        new_collection.load()
 
-        self.col.load()
 
     def _get_collection(self, collection_name):
-        self.col = Collection(
-            collection_name, consistency_level="Session", using=self.alias
-            )
-        self.col.load()
+        if collection_name not in self.collections:
+            self._create_collection(collection_name)
+        return self.collections[collection_name]
 
     def mul_add(self, datas: List[VectorData], model=None):
         collection_name_model = self.collection_name + '_' + model
-        self._create_collection(collection_name_model)
-
+        col = self._get_collection(collection_name_model)
         data_array, id_array = map(list, zip(*((data.data, data.id) for data in datas)))
         np_data = np.array(data_array).astype("float32")
         entities = [id_array, np_data]
-        self.col.insert(entities)
+        col.insert(entities)
+
 
     def search(self, data: np.ndarray, top_k: int = -1, model=None):
         if top_k == -1:
             top_k = self.top_k
         collection_name_model = self.collection_name + '_' + model
-        self._create_collection(collection_name_model)
-        search_result = self.col.search(
+        col = self._get_collection(collection_name_model)
+        search_result = col.search(
             data=data.reshape(1, -1).tolist(),
             anns_field="embedding",
             param=self.search_params,
             limit=top_k,
         )
         return list(zip(search_result[0].distances, search_result[0].ids))
 
+
     def delete(self, ids, model=None):
         collection_name_model = self.collection_name + '_' + model
-        self._get_collection(collection_name_model)
+        col = self._get_collection(collection_name_model)
 
         del_ids = ",".join([str(x) for x in ids])
-        resp = self.col.delete(f"id in [{del_ids}]")
+        resp = col.delete(f"id in [{del_ids}]")
         delete_count = resp.delete_count
         return delete_count
 
@@ -178,10 +186,12 @@ def rebuild_col(self, model):
             logging.info('create_collection: {}'.format(e))
 
     def rebuild(self, ids=None):  # pylint: disable=unused-argument
-        self.col.compact()
+        for col in self.collections.values():
+            col.compact()
 
     def flush(self):
-        self.col.flush(_async=True)
+        for col in self.collections.values():
+            col.flush(_async=True)
 
     def close(self):
         self.flush()
diff --git a/reference_doc/create_table.sql b/reference_doc/create_table.sql