|
160 | 160 | "id": "739f0bb73f05" |
161 | 161 | }, |
162 | 162 | "source": [ |
163 | | - "> Note: This library was designed for ease of use and simplicity, not for advanced use. If you plan on doing serious scraping or automated requests, please use [Pywikipediabot](http://www.mediawiki.org/wiki/Manual:Pywikipediabot) or one of the other more advanced [Python MediaWiki API wrappers](http://en.wikipedia.org/wiki/Wikipedia:Creating_a_bot#Python), which has a larger API, rate limiting, and other features so you can be considerate of the MediaWiki infrastructure." |
| 163 | + "Note: The [`wikipedia` package](https://pypi.org/project/wikipedia/) notes that it was \"designed for ease of use and simplicity, not for advanced use\", and that production or heavy use should instead \"use [Pywikipediabot](http://www.mediawiki.org/wiki/Manual:Pywikipediabot) or one of the other more advanced [Python MediaWiki API wrappers](http://en.wikipedia.org/wiki/Wikipedia:Creating_a_bot#Python)\"." |
164 | 164 | ] |
165 | 165 | }, |
166 | 166 | { |
|
189 | 189 | "import wikipedia\n", |
190 | 190 | "from wikipedia.exceptions import DisambiguationError, PageError\n", |
191 | 191 | "\n", |
192 | | - "\n", |
193 | | - "\n", |
194 | 192 | "import numpy as np\n", |
195 | 193 | "\n", |
196 | 194 | "from IPython.display import Markdown\n", |
|
323 | 321 | " for query in search_queries:\n", |
324 | 322 | " print(f'Searching for \"{query}\"')\n", |
325 | 323 | " search_terms = wikipedia.search(query)\n", |
| 324 | + "\n", |
326 | 325 | " print(f\"Related search terms: {search_terms[:n_topics]}\")\n", |
327 | 326 | " for search_term in search_terms[:n_topics]: # select first `n_topics` candidates\n", |
328 | 327 | " if search_term in search_history: # check if the topic is already covered\n", |
329 | 328 | " continue\n", |
| 329 | + "\n", |
330 | 330 | " print(f'Fetching page: \"{search_term}\"')\n", |
331 | 331 | " search_history.add(search_term) # add to search history\n", |
| 332 | + "\n", |
332 | 333 | " try:\n", |
333 | 334 | " # extract the relevant data by using `gemini-pro` model\n", |
334 | 335 | " page = wikipedia.page(search_term, auto_suggest=False)\n", |
|
345 | 346 | "\n", |
346 | 347 | " Note: Do not summarize. Only Extract and return the relevant information\n", |
347 | 348 | " \"\"\"))\n", |
| 349 | + "\n", |
348 | 350 | " urls = [url]\n", |
349 | 351 | " if response.candidates[0].citation_metadata:\n", |
350 | 352 | " extra_citations = response.candidates[0].citation_metadata.citation_sources\n", |
|
724 | 726 | "outputs": [ |
725 | 727 | { |
726 | 728 | "data": { |
727 | | - "text/plain": [] |
| 729 | + "text/plain": [ |
| 730 | + "'No citations found'" |
| 731 | + ] |
728 | 732 | }, |
729 | 733 | "execution_count": 49, |
730 | 734 | "metadata": {}, |
|
1092 | 1096 | "id": "tip8ArqJf_ep" |
1093 | 1097 | }, |
1094 | 1098 | "source": [ |
1095 | | - "Please refer to the [embeddings_guide](https://ai.google.dev/docs/embeddings_guide) for more information on embeddings." |
| 1099 | + "Please refer to the [embeddings guide](https://ai.google.dev/docs/embeddings_guide) for more information on embeddings." |
1096 | 1100 | ] |
1097 | 1101 | }, |
1098 | 1102 | { |
|
0 commit comments