Skip to content

Commit e3386be

Browse files
committed
logging missing keys
1 parent 6ba28d4 commit e3386be

File tree

1 file changed

+35
-33
lines changed

1 file changed

+35
-33
lines changed

engine/clients/redis/upload.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -61,39 +61,41 @@ def upload_batch(
6161
#p = cls.client.pipeline(transaction=False)
6262
for i in range(len(ids)):
6363
idx = ids[i]
64-
vec = vectors[i]
65-
meta = metadata[i] if metadata else {}
66-
geopoints = {}
67-
payload = {}
68-
if meta is not None:
69-
for k, v in meta.items():
70-
# This is a patch for arxiv-titles dataset where we have a list of "labels", and
71-
# we want to index all of them under the same TAG field (whose separator is ';').
72-
if k == "labels":
73-
payload[k] = ";".join(v)
74-
if (
75-
v is not None
76-
and not isinstance(v, dict)
77-
and not isinstance(v, list)
78-
):
79-
payload[k] = v
80-
# Redis treats geopoints differently and requires putting them as
81-
# a comma-separated string with lat and lon coordinates
82-
geopoints = {
83-
k: ",".join(map(str, convert_to_redis_coords(v["lon"], v["lat"])))
84-
for k, v in meta.items()
85-
if isinstance(v, dict)
86-
}
87-
cls.client.hsetnx(
88-
str(idx),
89-
"vector",
90-
np.array(vec).astype(cls.np_data_type).tobytes()
91-
# mapping={
92-
# "vector": np.array(vec).astype(cls.np_data_type).tobytes(),
93-
# **payload,
94-
# **geopoints,
95-
# },
96-
)
64+
vector_key = str(idx)
65+
if cls.client.exists(vector_key) is False:
66+
print(f"Setting missing key: {vector_key}")
67+
vec = vectors[i]
68+
meta = metadata[i] if metadata else {}
69+
geopoints = {}
70+
payload = {}
71+
if meta is not None:
72+
for k, v in meta.items():
73+
# This is a patch for arxiv-titles dataset where we have a list of "labels", and
74+
# we want to index all of them under the same TAG field (whose separator is ';').
75+
if k == "labels":
76+
payload[k] = ";".join(v)
77+
if (
78+
v is not None
79+
and not isinstance(v, dict)
80+
and not isinstance(v, list)
81+
):
82+
payload[k] = v
83+
# Redis treats geopoints differently and requires putting them as
84+
# a comma-separated string with lat and lon coordinates
85+
geopoints = {
86+
k: ",".join(map(str, convert_to_redis_coords(v["lon"], v["lat"])))
87+
for k, v in meta.items()
88+
if isinstance(v, dict)
89+
}
90+
cls.client.hset(
91+
vector_key,
92+
mapping={
93+
"vector": np.array(vec).astype(cls.np_data_type).tobytes(),
94+
**payload,
95+
**geopoints,
96+
},
97+
)
98+
9799
#p.execute()
98100

99101
@classmethod

0 commit comments

Comments
 (0)