|
69 | 69 | "metadata": {}, |
70 | 70 | "outputs": [], |
71 | 71 | "source": [ |
72 | | - "page = requests.get('https://www.indeed.com/jobs?q=python&l=new+york')" |
| 72 | + "page = requests.get(\"https://www.indeed.com/jobs?q=python&l=new+york\")" |
73 | 73 | ] |
74 | 74 | }, |
75 | 75 | { |
|
101 | 101 | "metadata": {}, |
102 | 102 | "outputs": [], |
103 | 103 | "source": [ |
104 | | - "page_2 = requests.get('https://www.indeed.com/jobs?q=python&l=new+york&start=20')" |
| 104 | + "page_2 = requests.get(\n", |
| 105 | + " \"https://www.indeed.com/jobs?q=python&l=new+york&start=20\"\n", |
| 106 | + ")" |
105 | 107 | ] |
106 | 108 | }, |
107 | 109 | { |
|
119 | 121 | "source": [ |
120 | 122 | "def get_jobs(page=1):\n", |
121 | 123 | " \"\"\"Fetches the HTML from a search for Python jobs in New York on Indeed.com from a specified page.\"\"\"\n", |
122 | | - " base_url_indeed = 'https://www.indeed.com/jobs?q=python&l=new+york&start='\n", |
123 | | - " results_start_num = page*10\n", |
124 | | - " url = f'{base_url_indeed}{results_start_num}'\n", |
| 124 | + " base_url_indeed = \"https://www.indeed.com/jobs?q=python&l=new+york&start=\"\n", |
| 125 | + " results_start_num = page * 10\n", |
| 126 | + " url = f\"{base_url_indeed}{results_start_num}\"\n", |
125 | 127 | " page = requests.get(url)\n", |
126 | 128 | " return page" |
127 | 129 | ] |
|
159 | 161 | "source": [ |
160 | 162 | "def get_jobs(title, location, page=1):\n", |
161 | 163 | " \"\"\"Fetches the HTML from a search for Python jobs in New York on Indeed.com from a specified page.\"\"\"\n", |
162 | | - " loc = location.replace(' ', '+') # for multi-part locations\n", |
163 | | - " base_url_indeed = f'https://www.indeed.com/jobs?q={title}&l={loc}&start='\n", |
164 | | - " results_start_num = page*10\n", |
165 | | - " url = f'{base_url_indeed}{results_start_num}'\n", |
| 164 | + " loc = location.replace(\" \", \"+\") # for multi-part locations\n", |
| 165 | + " base_url_indeed = f\"https://www.indeed.com/jobs?q={title}&l={loc}&start=\"\n", |
| 166 | + " results_start_num = page * 10\n", |
| 167 | + " url = f\"{base_url_indeed}{results_start_num}\"\n", |
166 | 168 | " page = requests.get(url)\n", |
167 | 169 | " return page" |
168 | 170 | ] |
|
173 | 175 | "metadata": {}, |
174 | 176 | "outputs": [], |
175 | 177 | "source": [ |
176 | | - "get_jobs('python', 'new york', 3)" |
| 178 | + "get_jobs(\"python\", \"new york\", 3)" |
177 | 179 | ] |
178 | 180 | }, |
179 | 181 | { |
|
209 | 211 | "metadata": {}, |
210 | 212 | "outputs": [], |
211 | 213 | "source": [ |
212 | | - "site = get_jobs('python', 'new york')" |
| 214 | + "site = get_jobs(\"python\", \"new york\")" |
213 | 215 | ] |
214 | 216 | }, |
215 | 217 | { |
|
227 | 229 | "metadata": {}, |
228 | 230 | "outputs": [], |
229 | 231 | "source": [ |
230 | | - "results = soup.find(id='resultsCol')" |
| 232 | + "results = soup.find(id=\"resultsCol\")" |
231 | 233 | ] |
232 | 234 | }, |
233 | 235 | { |
|
236 | 238 | "metadata": {}, |
237 | 239 | "outputs": [], |
238 | 240 | "source": [ |
239 | | - "jobs = results.find_all('div', class_='result')" |
| 241 | + "jobs = results.find_all(\"div\", class_=\"result\")" |
240 | 242 | ] |
241 | 243 | }, |
242 | 244 | { |
|
252 | 254 | "metadata": {}, |
253 | 255 | "outputs": [], |
254 | 256 | "source": [ |
255 | | - "job_titles = [job.find('h2').find('a').text.strip() for job in jobs]" |
| 257 | + "job_titles = [job.find(\"h2\").find(\"a\").text.strip() for job in jobs]" |
256 | 258 | ] |
257 | 259 | }, |
258 | 260 | { |
|
277 | 279 | "metadata": {}, |
278 | 280 | "outputs": [], |
279 | 281 | "source": [ |
280 | | - "base_url = 'https://www.indeed.com'" |
| 282 | + "base_url = \"https://www.indeed.com\"" |
281 | 283 | ] |
282 | 284 | }, |
283 | 285 | { |
|
286 | 288 | "metadata": {}, |
287 | 289 | "outputs": [], |
288 | 290 | "source": [ |
289 | | - "job_links = [base_url + job.find('h2').find('a')['href'] for job in jobs]" |
| 291 | + "job_links = [base_url + job.find(\"h2\").find(\"a\")[\"href\"] for job in jobs]" |
290 | 292 | ] |
291 | 293 | }, |
292 | 294 | { |
|
311 | 313 | "metadata": {}, |
312 | 314 | "outputs": [], |
313 | 315 | "source": [ |
314 | | - "job_locations = [job.find(class_='location').text for job in jobs]" |
| 316 | + "job_locations = [job.find(class_=\"location\").text for job in jobs]" |
315 | 317 | ] |
316 | 318 | }, |
317 | 319 | { |
|
339 | 341 | "def parse_info(soup):\n", |
340 | 342 | " \"\"\"\n", |
341 | 343 | " Parses HTML containing job postings and picks out job title, location, and link.\n", |
342 | | - " \n", |
| 344 | + "\n", |
343 | 345 | " args:\n", |
344 | 346 | " soup (BeautifulSoup object): A parsed bs4.BeautifulSoup object of a search results page on indeed.com\n", |
345 | | - " \n", |
| 347 | + "\n", |
346 | 348 | " returns:\n", |
347 | 349 | " job_list (list): A list of dictionaries containing the title, link, and location of each job posting\n", |
348 | 350 | " \"\"\"\n", |
349 | | - " results = soup.find(id='resultsCol')\n", |
350 | | - " jobs = results.find_all('div', class_='result')\n", |
351 | | - " base_url = 'https://www.indeed.com'\n", |
| 351 | + " results = soup.find(id=\"resultsCol\")\n", |
| 352 | + " jobs = results.find_all(\"div\", class_=\"result\")\n", |
| 353 | + " base_url = \"https://www.indeed.com\"\n", |
352 | 354 | "\n", |
353 | 355 | " job_list = list()\n", |
354 | 356 | " for job in jobs:\n", |
355 | | - " title = job.find('h2').find('a').text.strip()\n", |
356 | | - " link = base_url + job.find('h2').find('a')['href']\n", |
357 | | - " location = job.find(class_='location').text\n", |
358 | | - " job_list.append({'title': title, 'link': link, 'location': location})\n", |
| 357 | + " title = job.find(\"h2\").find(\"a\").text.strip()\n", |
| 358 | + " link = base_url + job.find(\"h2\").find(\"a\")[\"href\"]\n", |
| 359 | + " location = job.find(class_=\"location\").text\n", |
| 360 | + " job_list.append({\"title\": title, \"link\": link, \"location\": location})\n", |
359 | 361 | "\n", |
360 | 362 | " return job_list" |
361 | 363 | ] |
|
373 | 375 | "metadata": {}, |
374 | 376 | "outputs": [], |
375 | 377 | "source": [ |
376 | | - "page = get_jobs('python', 'new_york')" |
| 378 | + "page = get_jobs(\"python\", \"new_york\")" |
377 | 379 | ] |
378 | 380 | }, |
379 | 381 | { |
|
418 | 420 | "source": [ |
419 | 421 | "def get_job_listings(title, location, amount=100):\n", |
420 | 422 | " results = list()\n", |
421 | | - " for page in range(amount//10):\n", |
| 423 | + " for page in range(amount // 10):\n", |
422 | 424 | " site = get_jobs(title, location, page=page)\n", |
423 | 425 | " soup = BeautifulSoup(site.content)\n", |
424 | 426 | " page_results = parse_info(soup)\n", |
|
432 | 434 | "metadata": {}, |
433 | 435 | "outputs": [], |
434 | 436 | "source": [ |
435 | | - "r = get_job_listings('python', 'new york', 100)" |
| 437 | + "r = get_job_listings(\"python\", \"new york\", 100)" |
436 | 438 | ] |
437 | 439 | }, |
438 | 440 | { |
|
0 commit comments