Merge pull request #195 from realpython/fix_bs4

jaschmitt · web-flow · commit 712c20ef17f8 · 2021-05-20T14:54:23.000-07:00
Fix bs4
diff --git a/web-scraping-bs4/README.md b/web-scraping-bs4/README.md
@@ -1,8 +1,3 @@
 # Build a Web Scraper With Requests and Beautiful Soup
 
-This repository contains code relating to the Real Python tutorial on how to [Build a Web Scraper With Requests and Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/).
-
-There are two available scripts:
-
-1. **[`scrape_jobs.py`](https://github.com/realpython/materials/blob/master/web-scraping-bs4/scrape_jobs.py):** The sample script that you build throughout the tutorial
-2. **[`job_search.py`](https://github.com/realpython/materials/blob/master/web-scraping-bs4/job_search.py):** The final code expanded as a command-line-interface app
+This repository contains [`scrape_jobs.py`](https://github.com/realpython/materials/blob/master/web-scraping-bs4/scrape_jobs.py), which is the sample script built in the Real Python tutorial on how to [Build a Web Scraper With Requests and Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/).
diff --git a/web-scraping-bs4/job_search.py b/web-scraping-bs4/job_search.py
diff --git a/web-scraping-bs4/requirements.txt b/web-scraping-bs4/requirements.txt
@@ -0,0 +1,7 @@
+beautifulsoup4==4.9.3
+certifi==2020.12.5
+chardet==4.0.0
+idna==2.10
+requests==2.25.1
+soupsieve==2.2.1
+urllib3==1.26.4
diff --git a/web-scraping-bs4/scrape_jobs.py b/web-scraping-bs4/scrape_jobs.py
@@ -2,29 +2,28 @@
 from bs4 import BeautifulSoup
 
 
-URL = "https://www.monster.com/jobs/search/?q=Software-Developer\
-        &where=Australia"
+URL = "https://realpython.github.io/fake-jobs/"
 page = requests.get(URL)
 
 soup = BeautifulSoup(page.content, "html.parser")
 results = soup.find(id="ResultsContainer")
 
 # Look for Python jobs
-python_jobs = results.find_all("h2", string=lambda t: "python" in t.lower())
-for p_job in python_jobs:
-    link = p_job.find("a")["href"]
-    print(p_job.text.strip())
-    print(f"Apply here: {link}\n")
+print("PYTHON JOBS\n==============================\n")
+python_jobs = results.find_all(
+    "h2", string=lambda text: "python" in text.lower()
+)
+python_job_elements = [
+    h2_element.parent.parent.parent for h2_element in python_jobs
+]
 
-# Print out all available jobs from the scraped webpage
-job_elems = results.find_all("section", class_="card-content")
-for job_elem in job_elems:
-    title_elem = job_elem.find("h2", class_="title")
-    company_elem = job_elem.find("div", class_="company")
-    location_elem = job_elem.find("div", class_="location")
-    if None in (title_elem, company_elem, location_elem):
-        continue
-    print(title_elem.text.strip())
-    print(company_elem.text.strip())
-    print(location_elem.text.strip())
+for job_element in python_job_elements:
+    title_element = job_element.find("h2", class_="title")
+    company_element = job_element.find("h3", class_="company")
+    location_element = job_element.find("p", class_="location")
+    print(title_element.text.strip())
+    print(company_element.text.strip())
+    print(location_element.text.strip())
+    link_url = job_element.find_all("a")[1]["href"]
+    print(f"Apply here: {link_url}\n")
     print()