Skip to content

Commit 5e5cc76

Browse files
committed
add upsert step
1 parent 98dfe41 commit 5e5cc76

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

learn/experimental/merge-namespaces/merge-namespaces.ipynb

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,56 @@
7878
"vectors_ns1 = fetch_all_vectors(\"ns1\")\n",
7979
"vectors_ns2 = fetch_all_vectors(\"ns2\")"
8080
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"# Convert fetched vectors to the required upsert format\n",
89+
"def format_vectors_for_upsert(fetched_vectors):\n",
90+
" return [{\"id\": match['id'], \"values\": match['values']} for match in fetched_vectors]\n",
91+
"\n",
92+
"formatted_vectors_ns1 = format_vectors_for_upsert(vectors_ns1)\n",
93+
"formatted_vectors_ns2 = format_vectors_for_upsert(vectors_ns2)\n",
94+
"\n",
95+
"print(f\"Preparing to upsert {len(formatted_vectors_ns1)} vectors from ns1 and \\\n",
96+
"{len(formatted_vectors_ns2)} vectors from ns2\")"
97+
]
98+
},
99+
{
100+
"cell_type": "markdown",
101+
"metadata": {},
102+
"source": [
103+
"Note that any vectors with overlapping IDs between `ns1` and `ns2` will be overwritten by the `ns2` upsert."
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": null,
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
112+
"from itertools import islice\n",
113+
"\n",
114+
"# Upsert vectors in batches of 100\n",
115+
"def chunks(data, size=100):\n",
116+
" it = iter(data)\n",
117+
" for chunk in iter(lambda: tuple(islice(it, size)), ()):\n",
118+
" yield chunk\n",
119+
"\n",
120+
"# Upsert vectors into the merged namespace\n",
121+
"target_namespace = 'merged'\n",
122+
"for batch in chunks(formatted_vectors_ns1):\n",
123+
" index.upsert(vectors=batch, namespace=target_namespace)\n",
124+
"\n",
125+
"for batch in chunks(formatted_vectors_ns2):\n",
126+
" index.upsert(vectors=batch, namespace=target_namespace)\n",
127+
"\n",
128+
"print(f\"Upserted {len(formatted_vectors_ns1)} vectors from ns1 and \\\n",
129+
"{len(formatted_vectors_ns2)} vectors from ns2 into {target_namespace}\")"
130+
]
81131
}
82132
],
83133
"metadata": {

0 commit comments

Comments
 (0)