Clarify wikipedia note in reranking guide (google#288)

markmcd · web-flow · commit 45928ffa26c9 · 2024-03-06T09:12:12.000-08:00
* Clarify the note on the search re-ranking tutorial.

When published it looks like it could be talking about our package, so
I've reworded the note such that it is clear while keeping the
references intact.

* Minor tweaks
diff --git a/site/en/docs/search_reranking_using_embeddings.ipynb b/site/en/docs/search_reranking_using_embeddings.ipynb
@@ -160,7 +160,7 @@
         "id": "739f0bb73f05"
       },
       "source": [
-        "> Note: This library was designed for ease of use and simplicity, not for advanced use. If you plan on doing serious scraping or automated requests, please use [Pywikipediabot](http://www.mediawiki.org/wiki/Manual:Pywikipediabot) or one of the other more advanced [Python MediaWiki API wrappers](http://en.wikipedia.org/wiki/Wikipedia:Creating_a_bot#Python), which has a larger API, rate limiting, and other features so you can be considerate of the MediaWiki infrastructure."
+        "Note: The [`wikipedia` package](https://pypi.org/project/wikipedia/) notes that it was \"designed for ease of use and simplicity, not for advanced use\", and that production or heavy use should instead \"use [Pywikipediabot](http://www.mediawiki.org/wiki/Manual:Pywikipediabot) or one of the other more advanced [Python MediaWiki API wrappers](http://en.wikipedia.org/wiki/Wikipedia:Creating_a_bot#Python)\"."
       ]
     },
     {
@@ -189,8 +189,6 @@
         "import wikipedia\n",
         "from wikipedia.exceptions import DisambiguationError, PageError\n",
         "\n",
-        "\n",
-        "\n",
         "import numpy as np\n",
         "\n",
         "from IPython.display import Markdown\n",
@@ -323,12 +321,15 @@
         "  for query in search_queries:\n",
         "    print(f'Searching for \"{query}\"')\n",
         "    search_terms = wikipedia.search(query)\n",
+        "\n",
         "    print(f\"Related search terms: {search_terms[:n_topics]}\")\n",
         "    for search_term in search_terms[:n_topics]: # select first `n_topics` candidates\n",
         "      if search_term in search_history: # check if the topic is already covered\n",
         "        continue\n",
+        "\n",
         "      print(f'Fetching page: \"{search_term}\"')\n",
         "      search_history.add(search_term) # add to search history\n",
+        "\n",
         "      try:\n",
         "        # extract the relevant data by using `gemini-pro` model\n",
         "        page = wikipedia.page(search_term, auto_suggest=False)\n",
@@ -345,6 +346,7 @@
         "\n",
         "            Note: Do not summarize. Only Extract and return the relevant information\n",
         "        \"\"\"))\n",
+        "\n",
         "        urls = [url]\n",
         "        if response.candidates[0].citation_metadata:\n",
         "          extra_citations = response.candidates[0].citation_metadata.citation_sources\n",
@@ -724,7 +726,9 @@
       "outputs": [
         {
           "data": {
-            "text/plain": []
+            "text/plain": [
+              "'No citations found'"
+            ]
           },
           "execution_count": 49,
           "metadata": {},
@@ -1092,7 +1096,7 @@
         "id": "tip8ArqJf_ep"
       },
       "source": [
-        "Please refer to the [embeddings_guide](https://ai.google.dev/docs/embeddings_guide) for more information on embeddings."
+        "Please refer to the [embeddings guide](https://ai.google.dev/docs/embeddings_guide) for more information on embeddings."
       ]
     },
     {