|
| 1 | +import requests |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import argparse |
| 4 | + |
| 5 | + |
| 6 | +def scrape_jobs(location=None): |
| 7 | + """Scrapes Developer job postings from Monster, optionally by location. |
| 8 | +
|
| 9 | + :param location: Where the job is located |
| 10 | + :type location: str |
| 11 | + :return: all job postings from first page that match the search results |
| 12 | + :rtype: BeautifulSoup object |
| 13 | + """ |
| 14 | + if location: |
| 15 | + URL = f"https://www.monster.com/jobs/search/\ |
| 16 | + ?q=Software-Developer&where={location}" |
| 17 | + else: |
| 18 | + URL = f"https://www.monster.com/jobs/search/?q=Software-Developer" |
| 19 | + page = requests.get(URL) |
| 20 | + |
| 21 | + soup = BeautifulSoup(page.content, "html.parser") |
| 22 | + results = soup.find(id="ResultsContainer") |
| 23 | + return results |
| 24 | + |
| 25 | + |
| 26 | +def filter_jobs_by_keyword(results, word): |
| 27 | + """Filters job postings by word and prints matching job title plus link. |
| 28 | +
|
| 29 | + :param results: Parsed HTML container with all job listings |
| 30 | + :type results: BeautifulSoup object |
| 31 | + :param word: keyword to filter by |
| 32 | + :type word: str |
| 33 | + :return: None - just meant to print results |
| 34 | + :rtype: None |
| 35 | + """ |
| 36 | + filtered_jobs = results.find_all( |
| 37 | + "h2", string=lambda text: word in text.lower() |
| 38 | + ) |
| 39 | + for f_job in filtered_jobs: |
| 40 | + link = f_job.find("a")["href"] |
| 41 | + print(f_job.text.strip()) |
| 42 | + print(f"Apply here: {link}\n") |
| 43 | + |
| 44 | + |
| 45 | +def print_all_jobs(results): |
| 46 | + """Print details of all jobs returned by the search. |
| 47 | +
|
| 48 | + The printed details are title, link, company name and location of the job. |
| 49 | +
|
| 50 | + :param results: Parsed HTML container with all job listings |
| 51 | + :type results: BeautifulSoup object |
| 52 | + :return: None - just meant to print results |
| 53 | + :rtype: None |
| 54 | + """ |
| 55 | + job_elems = results.find_all("section", class_="card-content") |
| 56 | + |
| 57 | + for job_elem in job_elems: |
| 58 | + # keep in mind that each job_elem is another BeautifulSoup object! |
| 59 | + title_elem = job_elem.find("h2", class_="title") |
| 60 | + company_elem = job_elem.find("div", class_="company") |
| 61 | + location_elem = job_elem.find("div", class_="location") |
| 62 | + if None in (title_elem, company_elem, location_elem): |
| 63 | + continue |
| 64 | + # print(job_elem.prettify()) # to inspect the 'None' element |
| 65 | + print(title_elem.text.strip()) |
| 66 | + link_elem = title_elem.find("a") |
| 67 | + print(link_elem["href"]) |
| 68 | + print(company_elem.text.strip()) |
| 69 | + print(location_elem.text.strip()) |
| 70 | + print() |
| 71 | + |
| 72 | + |
| 73 | +# USE THE SCRIPT AS A COMMAND-LINE INTERFACE |
| 74 | +# ---------------------------------------------------------------------------- |
| 75 | +my_parser = argparse.ArgumentParser( |
| 76 | + prog="jobs", description="Find Developer Jobs" |
| 77 | +) |
| 78 | +my_parser.add_argument( |
| 79 | + "-location", metavar="location", type=str, help="The location of the job" |
| 80 | +) |
| 81 | +my_parser.add_argument( |
| 82 | + "-word", metavar="word", type=str, help="What keyword to filter by" |
| 83 | +) |
| 84 | + |
| 85 | +args = my_parser.parse_args() |
| 86 | +location, keyword = args.location, args.word |
| 87 | + |
| 88 | +results = scrape_jobs(location) |
| 89 | +if keyword: |
| 90 | + filter_jobs_by_keyword(results, keyword.lower()) |
| 91 | +else: |
| 92 | + print_all_jobs(results) |
0 commit comments