@@ -61,39 +61,41 @@ def upload_batch(
6161 #p = cls.client.pipeline(transaction=False)
6262 for i in range (len (ids )):
6363 idx = ids [i ]
64- vec = vectors [i ]
65- meta = metadata [i ] if metadata else {}
66- geopoints = {}
67- payload = {}
68- if meta is not None :
69- for k , v in meta .items ():
70- # This is a patch for arxiv-titles dataset where we have a list of "labels", and
71- # we want to index all of them under the same TAG field (whose separator is ';').
72- if k == "labels" :
73- payload [k ] = ";" .join (v )
74- if (
75- v is not None
76- and not isinstance (v , dict )
77- and not isinstance (v , list )
78- ):
79- payload [k ] = v
80- # Redis treats geopoints differently and requires putting them as
81- # a comma-separated string with lat and lon coordinates
82- geopoints = {
83- k : "," .join (map (str , convert_to_redis_coords (v ["lon" ], v ["lat" ])))
84- for k , v in meta .items ()
85- if isinstance (v , dict )
86- }
87- cls .client .hsetnx (
88- str (idx ),
89- "vector" ,
90- np .array (vec ).astype (cls .np_data_type ).tobytes ()
91- # mapping={
92- # "vector": np.array(vec).astype(cls.np_data_type).tobytes(),
93- # **payload,
94- # **geopoints,
95- # },
96- )
64+ vector_key = str (idx )
65+ if cls .client .exists (vector_key ) is False :
66+ print (f"Setting missing key: { vector_key } " )
67+ vec = vectors [i ]
68+ meta = metadata [i ] if metadata else {}
69+ geopoints = {}
70+ payload = {}
71+ if meta is not None :
72+ for k , v in meta .items ():
73+ # This is a patch for arxiv-titles dataset where we have a list of "labels", and
74+ # we want to index all of them under the same TAG field (whose separator is ';').
75+ if k == "labels" :
76+ payload [k ] = ";" .join (v )
77+ if (
78+ v is not None
79+ and not isinstance (v , dict )
80+ and not isinstance (v , list )
81+ ):
82+ payload [k ] = v
83+ # Redis treats geopoints differently and requires putting them as
84+ # a comma-separated string with lat and lon coordinates
85+ geopoints = {
86+ k : "," .join (map (str , convert_to_redis_coords (v ["lon" ], v ["lat" ])))
87+ for k , v in meta .items ()
88+ if isinstance (v , dict )
89+ }
90+ cls .client .hset (
91+ vector_key ,
92+ mapping = {
93+ "vector" : np .array (vec ).astype (cls .np_data_type ).tobytes (),
94+ ** payload ,
95+ ** geopoints ,
96+ },
97+ )
98+
9799 #p.execute()
98100
99101 @classmethod
0 commit comments