Skip to content

Commit c9a2cc9

Browse files
authored
Merge pull request #361 from junefish/merge-namespaces
Create colab notebook for merging two namespaces
2 parents 632c4b4 + 2ed2c58 commit c9a2cc9

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/experimental/merge-namespaces/merge-namespaces.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/experimental/merge-namespaces/merge-namespaces.ipynb)"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"# Merging Namespaces in a Pinecone Index"
15+
]
16+
},
17+
{
18+
"cell_type": "markdown",
19+
"metadata": {},
20+
"source": [
21+
"This notebook queries vectors out of two namespaces `ns1` and `ns2` and upserts them to a new namespace named `merged`.\n",
22+
"\n",
23+
"Please note this code is **experimental** and not guaranteed to work by Pinecone. Test thoroughly before using in production."
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"%pip install -qU pinecone-notebooks pinecone-client[grpc]"
33+
]
34+
},
35+
{
36+
"cell_type": "code",
37+
"execution_count": null,
38+
"metadata": {},
39+
"outputs": [],
40+
"source": [
41+
"from pinecone.grpc import PineconeGRPC as Pinecone\n",
42+
"from pinecone import ServerlessSpec\n",
43+
"import os\n",
44+
"\n",
45+
"# The generated API key is available in the PINECONE_API_KEY environment variable\n",
46+
"api_key = os.environ.get('PINECONE_API_KEY')\n",
47+
"\n",
48+
"# Use the API key to initialize the Pinecone client\n",
49+
"pc = Pinecone(api_key=api_key)\n",
50+
"\n",
51+
"# Connect to your index\n",
52+
"index_name = \"namespace-test\" # replace with the correct index name\n",
53+
"index = pc.Index(index_name)"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"import random\n",
63+
"\n",
64+
"dimension = index.describe_index_stats()['dimension']\n",
65+
"# Function to fetch all vectors from a namespace\n",
66+
"def fetch_all_vectors(namespace):\n",
67+
" count = index.describe_index_stats()['namespaces'][namespace]['vector_count']\n",
68+
" random_vecs = [random.random() for _ in range(dimension)]\n",
69+
" response = index.query(\n",
70+
" namespace=namespace,\n",
71+
" vector=random_vecs,\n",
72+
" top_k=count,\n",
73+
" include_values=True\n",
74+
" )\n",
75+
" return response['matches']\n",
76+
"\n",
77+
"# Fetch vectors from ns1 and ns2\n",
78+
"vectors_ns1 = fetch_all_vectors(\"ns1\")\n",
79+
"vectors_ns2 = fetch_all_vectors(\"ns2\")"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"# Convert fetched vectors to the required upsert format\n",
89+
"def format_vectors_for_upsert(fetched_vectors):\n",
90+
" return [{\"id\": match['id'], \"values\": match['values']} for match in fetched_vectors]\n",
91+
"\n",
92+
"formatted_vectors_ns1 = format_vectors_for_upsert(vectors_ns1)\n",
93+
"formatted_vectors_ns2 = format_vectors_for_upsert(vectors_ns2)\n",
94+
"\n",
95+
"print(f\"Preparing to upsert {len(formatted_vectors_ns1)} vectors from ns1 and \\\n",
96+
"{len(formatted_vectors_ns2)} vectors from ns2\")"
97+
]
98+
},
99+
{
100+
"cell_type": "markdown",
101+
"metadata": {},
102+
"source": [
103+
"Note that any vectors with overlapping IDs between `ns1` and `ns2` will be overwritten by the `ns2` upsert."
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": null,
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
112+
"from itertools import islice\n",
113+
"\n",
114+
"# Upsert vectors in batches of 100\n",
115+
"def chunks(data, size=100):\n",
116+
" it = iter(data)\n",
117+
" for chunk in iter(lambda: tuple(islice(it, size)), ()):\n",
118+
" yield chunk\n",
119+
"\n",
120+
"# Upsert vectors into the merged namespace\n",
121+
"target_namespace = 'merged'\n",
122+
"for batch in chunks(formatted_vectors_ns1):\n",
123+
" index.upsert(vectors=batch, namespace=target_namespace)\n",
124+
"\n",
125+
"for batch in chunks(formatted_vectors_ns2):\n",
126+
" index.upsert(vectors=batch, namespace=target_namespace)\n",
127+
"\n",
128+
"print(f\"Upserted {len(formatted_vectors_ns1)} vectors from ns1 and \\\n",
129+
"{len(formatted_vectors_ns2)} vectors from ns2 into {target_namespace}\")"
130+
]
131+
}
132+
],
133+
"metadata": {
134+
"language_info": {
135+
"name": "python"
136+
}
137+
},
138+
"nbformat": 4,
139+
"nbformat_minor": 2
140+
}

0 commit comments

Comments
 (0)