diff --git a/exercises/exercise02-Task_Dependencies.ipynb b/exercises/exercise02-Task_Dependencies.ipynb index e53d488..cab62d0 100644 --- a/exercises/exercise02-Task_Dependencies.ipynb +++ b/exercises/exercise02-Task_Dependencies.ipynb @@ -227,7 +227,7 @@ "from bs4 import BeautifulSoup\n", "import requests\n", "\n", - "import pandas as pd" + "import modin.pandas as pd" ] }, { @@ -250,21 +250,19 @@ "\n", "def parse_google_response(keyword, response):\n", " soup = BeautifulSoup(response.text, 'lxml')\n", - " df = pd.DataFrame(columns=['title', 'link', 'description'])\n", + " data = {'title': [], 'link': [], 'description': [], 'library': []}\n", " for g in soup.find_all(class_='g'):\n", " entry = {}\n", " headers = g.find_all(class_='r')\n", " # Sometimes results have no headers\n", " if len(headers) != 1:\n", " continue\n", - " entry['title'] = headers[0].text\n", - " entry['link'] = headers[0].find('a').get('href')[7:]\n", + " data['title'].append(headers[0].text)\n", + " data['link'].append(headers[0].find('a').get('href')[7:])\n", " description = g.find_all(class_='st')\n", - " entry['description'] = description[0].text if len(description) > 0 else \"\"\n", - " df = df.append(pd.DataFrame(entry, index=[0]))\n", - " \n", - " df['library'] = keyword\n", - " return df\n", + " data['description'].append(description[0].text if len(description) > 0 else \"\")\n", + " data['library'].append(keyword)\n", + " return data\n", "\n", "def get_results(keyword):\n", " response = query_google(\"learn {}\".format(keyword))\n", @@ -285,13 +283,13 @@ "outputs": [], "source": [ "start = time.time()\n", - "keywords = [\"ray\", \"rllib\", \"tune\", \"modin\", \"plasma\", \"arrow\"]\n", + "keywords = [\"ray\", \"rllib\", \"tune\", \"modin.pandas\", \"plasma\", \"arrow\"]\n", "results = []\n", "for keyword in keywords:\n", - " df = get_results(keyword)\n", - " results.append(df)\n", - " \n", - "df = pd.concat(results)\n", + " data = get_results(keyword)\n", + " results.append(data)\n", + "\n", + "df = pd.concat([pd.DataFrame(r) for r in results])\n", "duration = time.time() - start\n", "print(\"Constructing the dataframe took {} seconds.\".format(duration))" ] @@ -311,6 +309,14 @@ "source": [ "df" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**