|
| 1 | +import requests |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import argparse |
| 4 | + |
| 5 | + |
| 6 | +def scrape_jobs(location=None): |
| 7 | + """Scrapes Software Developer job postings from Monster, optionally by location. |
| 8 | +
|
| 9 | + :param location: Where the job is located |
| 10 | + :type location: str |
| 11 | + :return: all job postings from first page that match the search URL results |
| 12 | + :rtype: BeautifulSoup object |
| 13 | + """ |
| 14 | + if location: |
| 15 | + URL = f"https://www.monster.com/jobs/search/?q=Software-Developer&where={location}" |
| 16 | + else: |
| 17 | + URL = f"https://www.monster.com/jobs/search/?q=Software-Developer" |
| 18 | + page = requests.get(URL) |
| 19 | + |
| 20 | + soup = BeautifulSoup(page.content, 'html.parser') |
| 21 | + results = soup.find(id='ResultsContainer') |
| 22 | + return results |
| 23 | + |
| 24 | + |
| 25 | +def filter_jobs_by_keyword(results, word): |
| 26 | + """Filters job postings by word and prints the matching job title plus link. |
| 27 | +
|
| 28 | + :param results: Parsed HTML container with all job listings |
| 29 | + :type results: BeautifulSoup object |
| 30 | + :param word: keyword to filter by |
| 31 | + :type word: str |
| 32 | + :return: None - just meant to print results |
| 33 | + :rtype: None |
| 34 | + """ |
| 35 | + filtered_jobs = results.find_all('h2', string=lambda text: word in text.lower()) |
| 36 | + for f_job in filtered_jobs: |
| 37 | + link = f_job.find('a')['href'] |
| 38 | + print(f_job.text.strip()) |
| 39 | + print(f"Apply here: {link}\n") |
| 40 | + |
| 41 | + |
| 42 | +def print_all_jobs(results): |
| 43 | + """Print details (title, link, company name and location) of all jobs returned by the search. |
| 44 | +
|
| 45 | + :param results: Parsed HTML container with all job listings |
| 46 | + :type results: BeautifulSoup object |
| 47 | + :return: None - just meant to print results |
| 48 | + :rtype: None |
| 49 | + """ |
| 50 | + job_elems = results.find_all('section', class_='card-content') |
| 51 | + |
| 52 | + for job_elem in job_elems: |
| 53 | + # keep in mind that each job_elem is another BeautifulSoup object! |
| 54 | + title_elem = job_elem.find('h2', class_='title') |
| 55 | + company_elem = job_elem.find('div', class_='company') |
| 56 | + location_elem = job_elem.find('div', class_='location') |
| 57 | + if None in (title_elem, company_elem, location_elem): |
| 58 | + continue |
| 59 | + # print(job_elem.prettify()) # to inspect the 'None' element further |
| 60 | + print(title_elem.text.strip()) |
| 61 | + link_elem = title_elem.find('a') |
| 62 | + print(link_elem['href']) |
| 63 | + print(company_elem.text.strip()) |
| 64 | + print(location_elem.text.strip()) |
| 65 | + print() |
| 66 | + |
| 67 | + |
| 68 | +# USE THE SCRIPT AS A COMMAND-LINE INTERFACE |
| 69 | +# -------------------------------------------------------------------------------- |
| 70 | +my_parser = argparse.ArgumentParser(prog='jobs', description='Find Developer Jobs') |
| 71 | +my_parser.add_argument('-location', |
| 72 | + metavar='location', |
| 73 | + type=str, |
| 74 | + help='The location of the job') |
| 75 | +my_parser.add_argument('-word', |
| 76 | + metavar='word', |
| 77 | + type=str, |
| 78 | + help='What keyword to filter by') |
| 79 | + |
| 80 | +args = my_parser.parse_args() |
| 81 | +location, keyword = args.location, args.word |
| 82 | + |
| 83 | +results = scrape_jobs(location) |
| 84 | +if keyword: |
| 85 | + filter_jobs_by_keyword(results, keyword.lower()) |
| 86 | +else: |
| 87 | + print_all_jobs(results) |
0 commit comments