Skip to content

Commit b0c50c2

Browse files
authored
Update links in docs/quick-tour notebooks (#428)
## Problem Some notebooks contain broken links ## Solution Fix them
1 parent ff856ce commit b0c50c2

File tree

8 files changed

+62
-87
lines changed

8 files changed

+62
-87
lines changed

.github/actions/validate-json/validate-notebook-formats.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ def main():
1818
has_error = False
1919
# Walk through the repository to find all .ipynb files
2020
failing_notebooks = []
21-
for root, _, files in os.walk("."):
21+
for root, _, files in os.walk(".", topdown=True):
22+
if '.git' in root:
23+
continue
2224
for file in files:
2325
if file.endswith(".ipynb"):
2426
notebook_path = os.path.join(root, file)

.github/scripts/version-census.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ def main():
3333
plugins_used = {}
3434
malformed_notebooks = []
3535

36-
for root, _, files in os.walk("."):
36+
for root, _, files in os.walk(".", topdown=True):
37+
if '.git' in root:
38+
continue
3739
for file in files:
3840
if file.endswith(".ipynb"):
3941
notebook_path = os.path.join(root, file)

.github/workflows/client-versions.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ jobs:
1515
with:
1616
python-version: '3.11'
1717

18-
- name: Install uv
19-
uses: astral-sh/setup-uv@v5
18+
- name: Install Poetry
19+
uses: snok/install-poetry@v1
2020

2121
- name: Install dependencies
2222
run: |
23-
uv sync
23+
poetry install --with dev
2424
2525
- name: Run version census
2626
run: |
27-
uv run .github/scripts/version-census.py
27+
poetry run python3 .github/scripts/version-census.py

docs/quick-tour/hello-pinecone.ipynb

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,7 @@
9595
},
9696
"outputs": [],
9797
"source": [
98-
"!pip install -qU \\\n",
99-
" pinecone==6.0.1 \\\n",
100-
" pandas"
98+
"!pip install -qU pandas==2.2.3 pinecone==6.0.2"
10199
]
102100
},
103101
{
@@ -130,7 +128,7 @@
130128
"from pinecone import Pinecone\n",
131129
"\n",
132130
"# Get your API key at app.pinecone.io\n",
133-
"api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
131+
"api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
134132
"\n",
135133
"# Instantiate the Pinecone client\n",
136134
"pc = Pinecone(api_key=api_key)"
@@ -222,7 +220,7 @@
222220
"- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
223221
"- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
224222
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
225-
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
223+
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
226224
"\n",
227225
"There are more configurations available, but this minimal set will get us started."
228226
]
@@ -279,10 +277,7 @@
279277
" name=index_name,\n",
280278
" metric=Metric.COSINE,\n",
281279
" dimension=3,\n",
282-
" spec=ServerlessSpec(\n",
283-
" cloud=CloudProvider.AWS, \n",
284-
" region=AwsRegion.US_EAST_1\n",
285-
" )\n",
280+
" spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),\n",
286281
")"
287282
]
288283
},
@@ -478,10 +473,12 @@
478473
" \"vector\": [\n",
479474
" [random.random() for i in range(description.dimension)]\n",
480475
" for _ in range(num_vectors)\n",
481-
" ]\n",
482-
" })\n",
476+
" ],\n",
477+
" }\n",
478+
" )\n",
483479
" return df\n",
484480
"\n",
481+
"\n",
485482
"df = create_simulated_data_in_df(10)\n",
486483
"\n",
487484
"df.head()"
@@ -581,12 +578,14 @@
581578
"source": [
582579
"import time\n",
583580
"\n",
581+
"\n",
584582
"def is_fresh(index):\n",
585583
" stats = index.describe_index_stats()\n",
586584
" vector_count = stats.total_vector_count\n",
587585
" print(f\"Vector count: \", vector_count)\n",
588586
" return vector_count > 0\n",
589587
"\n",
588+
"\n",
590589
"while not is_fresh(index):\n",
591590
" # It takes a few moments for vectors we just upserted\n",
592591
" # to become available for querying\n",
@@ -699,13 +698,9 @@
699698
"# In a more realistic scenario, this would be an embedding vector\n",
700699
"# that encodes something meaningful. For this simple demo, we will\n",
701700
"# make up a vector that matches the dimension of our index.\n",
702-
"query_embedding = [2., 2., 2.]\n",
701+
"query_embedding = [2.0, 2.0, 2.0]\n",
703702
"\n",
704-
"index.query(\n",
705-
" vector=query_embedding,\n",
706-
" top_k=5,\n",
707-
" include_values=True\n",
708-
")"
703+
"index.query(vector=query_embedding, top_k=5, include_values=True)"
709704
]
710705
},
711706
{

docs/quick-tour/interacting-with-the-index.ipynb

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,7 @@
108108
},
109109
"outputs": [],
110110
"source": [
111-
"!pip install -qU \\\n",
112-
" pinecone==6.0.1 \\\n",
113-
" pandas"
111+
"!pip install -qU pandas==2.2.3 pinecone==6.0.2"
114112
]
115113
},
116114
{
@@ -155,7 +153,7 @@
155153
"from pinecone import Pinecone\n",
156154
"\n",
157155
"# Get API key at app.pinecone.io\n",
158-
"api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
156+
"api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
159157
"\n",
160158
"# Instantiate the client\n",
161159
"pc = Pinecone(api_key=api_key)"
@@ -179,7 +177,7 @@
179177
"- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
180178
"- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
181179
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
182-
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
180+
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
183181
"\n",
184182
"There are more configurations available, but this minimal set will get us started."
185183
]
@@ -274,13 +272,10 @@
274272
"from pinecone import ServerlessSpec, Metric, CloudProvider, AwsRegion\n",
275273
"\n",
276274
"pc.create_index(\n",
277-
" name=index_name, \n",
278-
" dimension=2, \n",
275+
" name=index_name,\n",
276+
" dimension=2,\n",
279277
" metric=Metric.EUCLIDEAN,\n",
280-
" spec=ServerlessSpec(\n",
281-
" cloud=CloudProvider.AWS, \n",
282-
" region=AwsRegion.US_EAST_1\n",
283-
" )\n",
278+
" spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),\n",
284279
")"
285280
]
286281
},
@@ -450,7 +445,7 @@
450445
"\n",
451446
"df = pd.DataFrame()\n",
452447
"df[\"id\"] = [\"A\", \"B\", \"C\", \"D\", \"E\"]\n",
453-
"df[\"vector\"] = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]]\n",
448+
"df[\"vector\"] = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [5.0, 5.0]]\n",
454449
"df"
455450
]
456451
},
@@ -710,7 +705,7 @@
710705
],
711706
"source": [
712707
"# Update vectors by ID\n",
713-
"index.upsert(vectors=[(\"A\",[0.1, 0.1])])"
708+
"index.upsert(vectors=[(\"A\", [0.1, 0.1])])"
714709
]
715710
},
716711
{

docs/quick-tour/metadata-filtering.ipynb

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,7 @@
9595
},
9696
"outputs": [],
9797
"source": [
98-
"!pip install -qU \\\n",
99-
" pinecone==6.0.1 \\\n",
100-
" pandas"
98+
"!pip install -qU pandas==2.2.3 pinecone==6.0.2"
10199
]
102100
},
103101
{
@@ -131,10 +129,8 @@
131129
"import os\n",
132130
"from pinecone import Pinecone\n",
133131
"\n",
134-
"# initialize connection to pinecone (get API key at app.pinecone.io)\n",
135-
"api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
136-
"\n",
137-
"# configure client\n",
132+
"# Initialize client\n",
133+
"api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
138134
"pc = Pinecone(api_key=api_key)"
139135
]
140136
},
@@ -150,7 +146,7 @@
150146
"- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
151147
"- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
152148
"- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
153-
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
149+
"- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
154150
"\n",
155151
"There are more configurations available, but this minimal set will get us started."
156152
]
@@ -203,13 +199,10 @@
203199
"\n",
204200
"# Create an index\n",
205201
"index_config = pc.create_index(\n",
206-
" name=index_name, \n",
207-
" dimension=2, \n",
202+
" name=index_name,\n",
203+
" dimension=2,\n",
208204
" metric=Metric.EUCLIDEAN,\n",
209-
" spec=ServerlessSpec(\n",
210-
" cloud=CloudProvider.AWS, \n",
211-
" region=AwsRegion.US_EAST_1\n",
212-
" )\n",
205+
" spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),\n",
213206
")"
214207
]
215208
},
@@ -358,12 +351,13 @@
358351
"\n",
359352
"df = pd.DataFrame()\n",
360353
"df[\"id\"] = [\"F-1\", \"F-2\", \"S-1\", \"S-2\"]\n",
361-
"df[\"vector\"] = [[1., 1.], [2., 2.], [3., 3.], [4., 4.]]\n",
354+
"df[\"vector\"] = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0]]\n",
362355
"df[\"metadata\"] = [\n",
363356
" {\"category\": \"finance\", \"published\": 2015},\n",
364357
" {\"category\": \"finance\", \"published\": 2016},\n",
365358
" {\"category\": \"sport\", \"published\": 2017},\n",
366-
" {\"category\": \"sport\", \"published\": 2018}]\n",
359+
" {\"category\": \"sport\", \"published\": 2018},\n",
360+
"]\n",
367361
"df"
368362
]
369363
},
@@ -449,11 +443,13 @@
449443
"source": [
450444
"import time\n",
451445
"\n",
446+
"\n",
452447
"def is_fresh(index):\n",
453448
" stats = index.describe_index_stats()\n",
454449
" vector_count = stats.total_vector_count\n",
455450
" return vector_count > 0\n",
456451
"\n",
452+
"\n",
457453
"while not is_fresh(index):\n",
458454
" # It takes a few moments for vectors we just upserted\n",
459455
" # to become available for querying\n",
@@ -578,9 +574,7 @@
578574
],
579575
"source": [
580576
"query_results = index.query(\n",
581-
" vector=df[df.id == \"F-1\"].vector[0], \n",
582-
" top_k=3,\n",
583-
" include_metadata=True\n",
577+
" vector=df[df.id == \"F-1\"].vector[0], top_k=3, include_metadata=True\n",
584578
")\n",
585579
"query_results"
586580
]
@@ -646,13 +640,10 @@
646640
],
647641
"source": [
648642
"query_results = index.query(\n",
649-
" vector=df[df.id == \"F-1\"].vector[0], \n",
650-
" top_k=3, \n",
651-
" filter={\n",
652-
" \"category\" : {\"$eq\": \"finance\"},\n",
653-
" \"published\": {\"$gt\": 2015 }\n",
654-
" },\n",
655-
" include_metadata=True\n",
643+
" vector=df[df.id == \"F-1\"].vector[0],\n",
644+
" top_k=3,\n",
645+
" filter={\"category\": {\"$eq\": \"finance\"}, \"published\": {\"$gt\": 2015}},\n",
646+
" include_metadata=True,\n",
656647
")\n",
657648
"query_results"
658649
]

0 commit comments

Comments
 (0)