|
78 | 78 | "vectors_ns1 = fetch_all_vectors(\"ns1\")\n",
|
79 | 79 | "vectors_ns2 = fetch_all_vectors(\"ns2\")"
|
80 | 80 | ]
|
| 81 | + }, |
| 82 | + { |
| 83 | + "cell_type": "code", |
| 84 | + "execution_count": null, |
| 85 | + "metadata": {}, |
| 86 | + "outputs": [], |
| 87 | + "source": [ |
| 88 | + "# Convert fetched vectors to the required upsert format\n", |
| 89 | + "def format_vectors_for_upsert(fetched_vectors):\n", |
| 90 | + " return [{\"id\": match['id'], \"values\": match['values']} for match in fetched_vectors]\n", |
| 91 | + "\n", |
| 92 | + "formatted_vectors_ns1 = format_vectors_for_upsert(vectors_ns1)\n", |
| 93 | + "formatted_vectors_ns2 = format_vectors_for_upsert(vectors_ns2)\n", |
| 94 | + "\n", |
| 95 | + "print(f\"Preparing to upsert {len(formatted_vectors_ns1)} vectors from ns1 and \\\n", |
| 96 | + "{len(formatted_vectors_ns2)} vectors from ns2\")" |
| 97 | + ] |
| 98 | + }, |
| 99 | + { |
| 100 | + "cell_type": "markdown", |
| 101 | + "metadata": {}, |
| 102 | + "source": [ |
| 103 | + "Note that any vectors with overlapping IDs between `ns1` and `ns2` will be overwritten by the `ns2` upsert." |
| 104 | + ] |
| 105 | + }, |
| 106 | + { |
| 107 | + "cell_type": "code", |
| 108 | + "execution_count": null, |
| 109 | + "metadata": {}, |
| 110 | + "outputs": [], |
| 111 | + "source": [ |
| 112 | + "from itertools import islice\n", |
| 113 | + "\n", |
| 114 | + "# Upsert vectors in batches of 100\n", |
| 115 | + "def chunks(data, size=100):\n", |
| 116 | + " it = iter(data)\n", |
| 117 | + " for chunk in iter(lambda: tuple(islice(it, size)), ()):\n", |
| 118 | + " yield chunk\n", |
| 119 | + "\n", |
| 120 | + "# Upsert vectors into the merged namespace\n", |
| 121 | + "target_namespace = 'merged'\n", |
| 122 | + "for batch in chunks(formatted_vectors_ns1):\n", |
| 123 | + " index.upsert(vectors=batch, namespace=target_namespace)\n", |
| 124 | + "\n", |
| 125 | + "for batch in chunks(formatted_vectors_ns2):\n", |
| 126 | + " index.upsert(vectors=batch, namespace=target_namespace)\n", |
| 127 | + "\n", |
| 128 | + "print(f\"Upserted {len(formatted_vectors_ns1)} vectors from ns1 and \\\n", |
| 129 | + "{len(formatted_vectors_ns2)} vectors from ns2 into {target_namespace}\")" |
| 130 | + ] |
81 | 131 | }
|
82 | 132 | ],
|
83 | 133 | "metadata": {
|
|
0 commit comments