Skip to content

Commit e3eac54

Browse files
committed
mm cache func develope
1 parent 1871eec commit e3eac54

File tree

6 files changed

+59
-14
lines changed

6 files changed

+59
-14
lines changed

modelcache/core.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class Cache:
1717
def __init__(self):
1818
self.has_init = False
1919
self.cache_enable_func = None
20+
self.query_pre_embedding_func = None
21+
self.insert_pre_embedding_func = None
2022
self.mm_query_pre_embedding_func = None
2123
self.mm_insert_pre_embedding_func = None
2224
self.embedding_func = None
@@ -31,6 +33,8 @@ def __init__(self):
3133
def init(
3234
self,
3335
cache_enable_func=cache_all,
36+
query_pre_embedding_func=None,
37+
insert_pre_embedding_func=None,
3438
mm_query_pre_embedding_func=None,
3539
mm_insert_pre_embedding_func=None,
3640
embedding_func=string_embedding,
@@ -43,6 +47,8 @@ def init(
4347
):
4448
self.has_init = True
4549
self.cache_enable_func = cache_enable_func
50+
self.query_pre_embedding_func = query_pre_embedding_func
51+
self.insert_pre_embedding_func = insert_pre_embedding_func
4652
self.mm_query_pre_embedding_func = mm_query_pre_embedding_func
4753
self.mm_insert_pre_embedding_func = mm_insert_pre_embedding_func
4854
self.embedding_func = embedding_func

modelcache/manager/vector_data/redis.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,7 @@ def __init__(
2121
port: str = "6379",
2222
username: str = "",
2323
password: str = "",
24-
# dimension: int = 0,
2524
dimension: int = 0,
26-
i_dimension: int = 0,
27-
t_dimension: int = 0,
2825
top_k: int = 1,
2926
namespace: str = "",
3027
):
@@ -37,8 +34,6 @@ def __init__(
3734
)
3835
self.top_k = top_k
3936
self.dimension = dimension
40-
self.i_dimension = i_dimension
41-
self.t_dimension = t_dimension
4237
self.namespace = namespace
4338
self.doc_prefix = f"{self.namespace}doc:"
4439

modelcache/manager_mm/vector_data/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def get(name, **kwargs):
6969
local_data=local_data
7070
)
7171
elif name == "redis":
72-
from modelcache.manager.vector_data.redis import RedisVectorStore
72+
from modelcache.manager_mm.vector_data.redis import RedisVectorStore
7373
redis_config = kwargs.get("redis_config")
7474

7575
mm_dimension = kwargs.get("mm_dimension", DIMENSION)

modelcache/manager_mm/vector_data/redis.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,23 @@ def __init__(
2121
port: str = "6379",
2222
username: str = "",
2323
password: str = "",
24-
dimension: int = 0,
24+
mm_dimension: int = 0,
25+
i_dimension: int = 0,
26+
t_dimension: int = 0,
2527
top_k: int = 1,
2628
namespace: str = "",
2729
):
28-
if dimension <= 0:
30+
if mm_dimension <= 0:
2931
raise ValueError(
30-
f"invalid `dim` param: {dimension} in the Milvus vector store."
32+
f"invalid `dim` param: {mm_dimension} in the Milvus vector store."
3133
)
3234
self._client = Redis(
3335
host=host, port=int(port), username=username, password=password
3436
)
3537
self.top_k = top_k
36-
self.dimension = dimension
38+
self.mm_dimension = mm_dimension
39+
self.i_dimension = i_dimension
40+
self.t_dimension = t_dimension
3741
self.namespace = namespace
3842
self.doc_prefix = f"{self.namespace}doc:"
3943

@@ -47,8 +51,16 @@ def _check_index_exists(self, index_name: str) -> bool:
4751
modelcache_log.info("Index already exists")
4852
return True
4953

50-
def create_index(self, index_name, index_prefix):
51-
dimension = self.dimension
54+
def create_index(self, index_name, mm_type, index_prefix):
55+
# dimension = self.dimension
56+
if mm_type == 'IMG_TEXT':
57+
dimension = self.mm_dimension
58+
elif mm_type == 'IMG':
59+
dimension = self.i_dimension
60+
elif mm_type == 'TEXT':
61+
dimension = self.t_dimension
62+
else:
63+
raise ValueError('dimension type exception')
5264
print('dimension: {}'.format(dimension))
5365
if self._check_index_exists(index_name):
5466
modelcache_log.info(
@@ -77,13 +89,17 @@ def create_index(self, index_name, index_prefix):
7789
)
7890
return 'create_success'
7991

80-
def mul_add(self, datas: List[VectorData], model=None):
81-
# pipe = self._client.pipeline()
92+
def mul_add(self, datas: List[VectorData], model=None, mm_type=None):
8293
for data in datas:
8394
id: int = data.id
8495
embedding = data.data.astype(np.float32).tobytes()
96+
97+
collection_name = get_collection_iat_name(model, mm_type)
98+
index_prefix = get_collection_iat_prefix(model, mm_type)
99+
85100
id_field_name = "data_id"
86101
embedding_field_name = "data_vector"
102+
87103
obj = {id_field_name: id, embedding_field_name: embedding}
88104
index_prefix = get_index_prefix(model)
89105
self._client.hset(f"{index_prefix}{id}", mapping=obj)

modelcache/utils/index_util.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,27 @@ def get_index_name(model):
77

88
def get_index_prefix(model):
99
return 'prefix' + '_' + model
10+
11+
12+
def get_mm_index_name(model, iat_type):
13+
if iat_type not in ['IMG_TEXT', 'iat', 'IMG', 'image', 'TEXT', 'text']:
14+
raise ValueError('iat_type is not normal!')
15+
if iat_type == 'IMG_TEXT':
16+
iat_type = 'iat'
17+
elif iat_type == 'IMG':
18+
iat_type = 'image'
19+
elif iat_type == 'TEXT':
20+
iat_type = 'text'
21+
return 'multicache' + '_' + model + '_' + iat_type
22+
23+
24+
def get_collection_iat_prefix(model, iat_type, table_suffix):
25+
if iat_type not in ['IMG_TEXT', 'iat', 'IMG', 'image', 'TEXT', 'text']:
26+
raise ValueError('iat_type is not normal!')
27+
if iat_type == 'IMG_TEXT':
28+
iat_type = 'iat'
29+
elif iat_type == 'IMG':
30+
iat_type = 'image'
31+
elif iat_type == 'TEXT':
32+
iat_type = 'text'
33+
return 'prefix' + '_' + model + '_' + iat_type + '_' + table_suffix

multicache_serving.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
from concurrent.futures import ThreadPoolExecutor
2828
from modelcache.maya_embedding_service.maya_multi_embedding_service import get_embedding_multi
2929
from modelcache.maya_embedding_service.maya_multi_embedding_service import get_embedding_multi_concurrent_sin
30+
from modelcache.processor.pre import query_multi_splicing
31+
from modelcache.processor.pre import insert_multi_splicing
3032

3133

3234
def save_query_info(result, model, query, delta_time_log):
@@ -73,6 +75,8 @@ def __init__(self):
7375
embedding_concurrent_func=get_embedding_multi_concurrent_sin,
7476
data_manager=data_manager,
7577
similarity_evaluation=SearchDistanceEvaluation(),
78+
query_pre_embedding_func=query_multi_splicing,
79+
insert_pre_embedding_func=insert_multi_splicing,
7680
mm_insert_pre_embedding_func=mm_insert_dict,
7781
mm_query_pre_embedding_func=mm_query_dict,
7882
)

0 commit comments

Comments
 (0)