Update links in docs/quick-tour notebooks (#428)

jhamon · web-flow · commit b0c50c292a43 · 2025-03-26T10:41:56.000-04:00
## Problem

Some notebooks contain broken links

## Solution

Fix them
diff --git a/.github/actions/validate-json/validate-notebook-formats.py b/.github/actions/validate-json/validate-notebook-formats.py
@@ -18,7 +18,9 @@ def main():
     has_error = False
     # Walk through the repository to find all .ipynb files
     failing_notebooks = []
-    for root, _, files in os.walk("."):
+    for root, _, files in os.walk(".", topdown=True):
+        if '.git' in root:
+            continue
         for file in files:
             if file.endswith(".ipynb"):
                 notebook_path = os.path.join(root, file)
diff --git a/.github/scripts/version-census.py b/.github/scripts/version-census.py
@@ -33,7 +33,9 @@ def main():
     plugins_used = {}
     malformed_notebooks = []
 
-    for root, _, files in os.walk("."):
+    for root, _, files in os.walk(".", topdown=True):
+        if '.git' in root:
+            continue
         for file in files:
             if file.endswith(".ipynb"):
                 notebook_path = os.path.join(root, file)
diff --git a/.github/workflows/client-versions.yaml b/.github/workflows/client-versions.yaml
@@ -15,13 +15,13 @@ jobs:
         with:
           python-version: '3.11'
 
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
 
       - name: Install dependencies
         run: |
-          uv sync
+          poetry install --with dev
 
       - name: Run version census
         run: |
-          uv run .github/scripts/version-census.py
+          poetry run python3 .github/scripts/version-census.py
diff --git a/docs/quick-tour/hello-pinecone.ipynb b/docs/quick-tour/hello-pinecone.ipynb
@@ -95,9 +95,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install -qU \\\n",
-    "  pinecone==6.0.1 \\\n",
-    "  pandas"
+    "!pip install -qU pandas==2.2.3 pinecone==6.0.2"
    ]
   },
   {
@@ -130,7 +128,7 @@
     "from pinecone import Pinecone\n",
     "\n",
     "# Get your API key at app.pinecone.io\n",
-    "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
     "\n",
     "# Instantiate the Pinecone client\n",
     "pc = Pinecone(api_key=api_key)"
@@ -222,7 +220,7 @@
     "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
     "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
     "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
-    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
+    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
     "\n",
     "There are more configurations available, but this minimal set will get us started."
    ]
@@ -279,10 +277,7 @@
     "    name=index_name,\n",
     "    metric=Metric.COSINE,\n",
     "    dimension=3,\n",
-    "    spec=ServerlessSpec(\n",
-    "        cloud=CloudProvider.AWS, \n",
-    "        region=AwsRegion.US_EAST_1\n",
-    "    )\n",
+    "    spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),\n",
     ")"
    ]
   },
@@ -478,10 +473,12 @@
     "            \"vector\": [\n",
     "                [random.random() for i in range(description.dimension)]\n",
     "                for _ in range(num_vectors)\n",
-    "            ]\n",
-    "        })\n",
+    "            ],\n",
+    "        }\n",
+    "    )\n",
     "    return df\n",
     "\n",
+    "\n",
     "df = create_simulated_data_in_df(10)\n",
     "\n",
     "df.head()"
@@ -581,12 +578,14 @@
    "source": [
     "import time\n",
     "\n",
+    "\n",
     "def is_fresh(index):\n",
     "    stats = index.describe_index_stats()\n",
     "    vector_count = stats.total_vector_count\n",
     "    print(f\"Vector count: \", vector_count)\n",
     "    return vector_count > 0\n",
     "\n",
+    "\n",
     "while not is_fresh(index):\n",
     "    # It takes a few moments for vectors we just upserted\n",
     "    # to become available for querying\n",
@@ -699,13 +698,9 @@
     "# In a more realistic scenario, this would be an embedding vector\n",
     "# that encodes something meaningful. For this simple demo, we will\n",
     "# make up a vector that matches the dimension of our index.\n",
-    "query_embedding = [2., 2., 2.]\n",
+    "query_embedding = [2.0, 2.0, 2.0]\n",
     "\n",
-    "index.query(\n",
-    "    vector=query_embedding,\n",
-    "    top_k=5,\n",
-    "    include_values=True\n",
-    ")"
+    "index.query(vector=query_embedding, top_k=5, include_values=True)"
    ]
   },
   {
diff --git a/docs/quick-tour/interacting-with-the-index.ipynb b/docs/quick-tour/interacting-with-the-index.ipynb
@@ -108,9 +108,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install -qU \\\n",
-    "  pinecone==6.0.1 \\\n",
-    "  pandas"
+    "!pip install -qU pandas==2.2.3 pinecone==6.0.2"
    ]
   },
   {
@@ -155,7 +153,7 @@
     "from pinecone import Pinecone\n",
     "\n",
     "# Get API key at app.pinecone.io\n",
-    "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
     "\n",
     "# Instantiate the client\n",
     "pc = Pinecone(api_key=api_key)"
@@ -179,7 +177,7 @@
     "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
     "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
     "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
-    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
+    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
     "\n",
     "There are more configurations available, but this minimal set will get us started."
    ]
@@ -274,13 +272,10 @@
     "from pinecone import ServerlessSpec, Metric, CloudProvider, AwsRegion\n",
     "\n",
     "pc.create_index(\n",
-    "    name=index_name, \n",
-    "    dimension=2, \n",
+    "    name=index_name,\n",
+    "    dimension=2,\n",
     "    metric=Metric.EUCLIDEAN,\n",
-    "    spec=ServerlessSpec(\n",
-    "        cloud=CloudProvider.AWS, \n",
-    "        region=AwsRegion.US_EAST_1\n",
-    "    )\n",
+    "    spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),\n",
     ")"
    ]
   },
@@ -450,7 +445,7 @@
     "\n",
     "df = pd.DataFrame()\n",
     "df[\"id\"] = [\"A\", \"B\", \"C\", \"D\", \"E\"]\n",
-    "df[\"vector\"] = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]]\n",
+    "df[\"vector\"] = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [5.0, 5.0]]\n",
     "df"
    ]
   },
@@ -710,7 +705,7 @@
    ],
    "source": [
     "# Update vectors by ID\n",
-    "index.upsert(vectors=[(\"A\",[0.1, 0.1])])"
+    "index.upsert(vectors=[(\"A\", [0.1, 0.1])])"
    ]
   },
   {
diff --git a/docs/quick-tour/metadata-filtering.ipynb b/docs/quick-tour/metadata-filtering.ipynb
@@ -95,9 +95,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install -qU \\\n",
-    "  pinecone==6.0.1 \\\n",
-    "  pandas"
+    "!pip install -qU pandas==2.2.3 pinecone==6.0.2"
    ]
   },
   {
@@ -131,10 +129,8 @@
     "import os\n",
     "from pinecone import Pinecone\n",
     "\n",
-    "# initialize connection to pinecone (get API key at app.pinecone.io)\n",
-    "api_key = os.environ.get('PINECONE_API_KEY') or 'PINECONE_API_KEY'\n",
-    "\n",
-    "# configure client\n",
+    "# Initialize client\n",
+    "api_key = os.environ.get(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
     "pc = Pinecone(api_key=api_key)"
    ]
   },
@@ -150,7 +146,7 @@
     "- `name` can be anything we like. The name is used as an identifier for the index when performing other operations such as `describe_index`, `delete_index`, and so on. \n",
     "- `metric` specifies the similarity metric that will be used later when you make queries to the index.\n",
     "- `dimension` should correspond to the dimension of the dense vectors produced by your embedding model. In this quick start, we are using made-up data so a small value is simplest.\n",
-    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/docs/projects).\n",
+    "- `spec` holds a specification which tells Pinecone how you would like to deploy our index. You can find a list of all [available providers and regions here](https://docs.pinecone.io/troubleshooting/available-cloud-regions).\n",
     "\n",
     "There are more configurations available, but this minimal set will get us started."
    ]
@@ -203,13 +199,10 @@
     "\n",
     "# Create an index\n",
     "index_config = pc.create_index(\n",
-    "    name=index_name, \n",
-    "    dimension=2, \n",
+    "    name=index_name,\n",
+    "    dimension=2,\n",
     "    metric=Metric.EUCLIDEAN,\n",
-    "    spec=ServerlessSpec(\n",
-    "        cloud=CloudProvider.AWS, \n",
-    "        region=AwsRegion.US_EAST_1\n",
-    "    )\n",
+    "    spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),\n",
     ")"
    ]
   },
@@ -358,12 +351,13 @@
     "\n",
     "df = pd.DataFrame()\n",
     "df[\"id\"] = [\"F-1\", \"F-2\", \"S-1\", \"S-2\"]\n",
-    "df[\"vector\"] = [[1., 1.], [2., 2.], [3., 3.], [4., 4.]]\n",
+    "df[\"vector\"] = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0]]\n",
     "df[\"metadata\"] = [\n",
     "    {\"category\": \"finance\", \"published\": 2015},\n",
     "    {\"category\": \"finance\", \"published\": 2016},\n",
     "    {\"category\": \"sport\", \"published\": 2017},\n",
-    "    {\"category\": \"sport\", \"published\": 2018}]\n",
+    "    {\"category\": \"sport\", \"published\": 2018},\n",
+    "]\n",
     "df"
    ]
   },
@@ -449,11 +443,13 @@
    "source": [
     "import time\n",
     "\n",
+    "\n",
     "def is_fresh(index):\n",
     "    stats = index.describe_index_stats()\n",
     "    vector_count = stats.total_vector_count\n",
     "    return vector_count > 0\n",
     "\n",
+    "\n",
     "while not is_fresh(index):\n",
     "    # It takes a few moments for vectors we just upserted\n",
     "    # to become available for querying\n",
@@ -578,9 +574,7 @@
    ],
    "source": [
     "query_results = index.query(\n",
-    "    vector=df[df.id == \"F-1\"].vector[0], \n",
-    "    top_k=3,\n",
-    "    include_metadata=True\n",
+    "    vector=df[df.id == \"F-1\"].vector[0], top_k=3, include_metadata=True\n",
     ")\n",
     "query_results"
    ]
@@ -646,13 +640,10 @@
    ],
    "source": [
     "query_results = index.query(\n",
-    "    vector=df[df.id == \"F-1\"].vector[0], \n",
-    "    top_k=3, \n",
-    "    filter={\n",
-    "        \"category\" : {\"$eq\": \"finance\"},\n",
-    "        \"published\": {\"$gt\": 2015 }\n",
-    "    },\n",
-    "    include_metadata=True\n",
+    "    vector=df[df.id == \"F-1\"].vector[0],\n",
+    "    top_k=3,\n",
+    "    filter={\"category\": {\"$eq\": \"finance\"}, \"published\": {\"$gt\": 2015}},\n",
+    "    include_metadata=True,\n",
     ")\n",
     "query_results"
    ]
diff --git a/docs/quick-tour/namespacing.ipynb b/docs/quick-tour/namespacing.ipynb
diff --git a/pyproject.toml b/pyproject.toml