Skip to content

Commit 946ff60

Browse files
committed
Setting REDIS_KEEP_DOCUMENTS=1 as default
1 parent e3386be commit 946ff60

File tree

2 files changed

+36
-43
lines changed

2 files changed

+36
-43
lines changed

engine/clients/redis/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
REDIS_USER = os.getenv("REDIS_USER", None)
66
REDIS_CLUSTER = bool(int(os.getenv("REDIS_CLUSTER", 0)))
77
REDIS_HYBRID_POLICY = os.getenv("REDIS_HYBRID_POLICY", None)
8-
REDIS_KEEP_DOCUMENTS = bool(os.getenv("REDIS_KEEP_DOCUMENTS", 0))
8+
REDIS_KEEP_DOCUMENTS = bool(os.getenv("REDIS_KEEP_DOCUMENTS", 1))
99
GPU_STATS = bool(int(os.getenv("GPU_STATS", 0)))
1010
GPU_STATS_ENDPOINT = os.getenv("GPU_STATS_ENDPOINT", None)
1111

engine/clients/redis/upload.py

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -55,48 +55,41 @@ def init_client(cls, host, distance, connection_params, upload_params):
5555
def upload_batch(
5656
cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]]
5757
):
58-
# if we don't delete the docs we can skip sending them again
59-
# By default we always send the docs
60-
if REDIS_KEEP_DOCUMENTS is False:
61-
#p = cls.client.pipeline(transaction=False)
62-
for i in range(len(ids)):
63-
idx = ids[i]
64-
vector_key = str(idx)
65-
if cls.client.exists(vector_key) is False:
66-
print(f"Setting missing key: {vector_key}")
67-
vec = vectors[i]
68-
meta = metadata[i] if metadata else {}
69-
geopoints = {}
70-
payload = {}
71-
if meta is not None:
72-
for k, v in meta.items():
73-
# This is a patch for arxiv-titles dataset where we have a list of "labels", and
74-
# we want to index all of them under the same TAG field (whose separator is ';').
75-
if k == "labels":
76-
payload[k] = ";".join(v)
77-
if (
78-
v is not None
79-
and not isinstance(v, dict)
80-
and not isinstance(v, list)
81-
):
82-
payload[k] = v
83-
# Redis treats geopoints differently and requires putting them as
84-
# a comma-separated string with lat and lon coordinates
85-
geopoints = {
86-
k: ",".join(map(str, convert_to_redis_coords(v["lon"], v["lat"])))
87-
for k, v in meta.items()
88-
if isinstance(v, dict)
89-
}
90-
cls.client.hset(
91-
vector_key,
92-
mapping={
93-
"vector": np.array(vec).astype(cls.np_data_type).tobytes(),
94-
**payload,
95-
**geopoints,
96-
},
97-
)
98-
99-
#p.execute()
58+
for i in range(len(ids)):
59+
idx = ids[i]
60+
vector_key = str(idx)
61+
vec = vectors[i]
62+
meta = metadata[i] if metadata else {}
63+
geopoints = {}
64+
payload = {}
65+
if meta is not None:
66+
for k, v in meta.items():
67+
# This is a patch for arxiv-titles dataset where we have a list of "labels", and
68+
# we want to index all of them under the same TAG field (whose separator is ';').
69+
if k == "labels":
70+
payload[k] = ";".join(v)
71+
if (
72+
v is not None
73+
and not isinstance(v, dict)
74+
and not isinstance(v, list)
75+
):
76+
payload[k] = v
77+
# Redis treats geopoints differently and requires putting them as
78+
# a comma-separated string with lat and lon coordinates
79+
geopoints = {
80+
k: ",".join(map(str, convert_to_redis_coords(v["lon"], v["lat"])))
81+
for k, v in meta.items()
82+
if isinstance(v, dict)
83+
}
84+
cls.client.hset(
85+
vector_key,
86+
mapping={
87+
"vector": np.array(vec).astype(cls.np_data_type).tobytes(),
88+
**payload,
89+
**geopoints,
90+
},
91+
)
92+
10093

10194
@classmethod
10295
def post_upload(cls, _distance):

0 commit comments

Comments
 (0)