Skip to content

Commit 4d9c08c

Browse files
committed
Add CLI job search tool
1 parent 1fd9c01 commit 4d9c08c

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

web-scraping-bs4/job_search.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import argparse
4+
5+
6+
def scrape_jobs(location=None):
7+
"""Scrapes Software Developer job postings from Monster, optionally by location.
8+
9+
:param location: Where the job is located
10+
:type location: str
11+
:return: all job postings from first page that match the search URL results
12+
:rtype: BeautifulSoup object
13+
"""
14+
if location:
15+
URL = f"https://www.monster.com/jobs/search/?q=Software-Developer&where={location}"
16+
else:
17+
URL = f"https://www.monster.com/jobs/search/?q=Software-Developer"
18+
page = requests.get(URL)
19+
20+
soup = BeautifulSoup(page.content, 'html.parser')
21+
results = soup.find(id='ResultsContainer')
22+
return results
23+
24+
25+
def filter_jobs_by_keyword(results, word):
26+
"""Filters job postings by word and prints the matching job title plus link.
27+
28+
:param results: Parsed HTML container with all job listings
29+
:type results: BeautifulSoup object
30+
:param word: keyword to filter by
31+
:type word: str
32+
:return: None - just meant to print results
33+
:rtype: None
34+
"""
35+
filtered_jobs = results.find_all('h2', string=lambda text: word in text.lower())
36+
for f_job in filtered_jobs:
37+
link = f_job.find('a')['href']
38+
print(f_job.text.strip())
39+
print(f"Apply here: {link}\n")
40+
41+
42+
def print_all_jobs(results):
43+
"""Print details (title, link, company name and location) of all jobs returned by the search.
44+
45+
:param results: Parsed HTML container with all job listings
46+
:type results: BeautifulSoup object
47+
:return: None - just meant to print results
48+
:rtype: None
49+
"""
50+
job_elems = results.find_all('section', class_='card-content')
51+
52+
for job_elem in job_elems:
53+
# keep in mind that each job_elem is another BeautifulSoup object!
54+
title_elem = job_elem.find('h2', class_='title')
55+
company_elem = job_elem.find('div', class_='company')
56+
location_elem = job_elem.find('div', class_='location')
57+
if None in (title_elem, company_elem, location_elem):
58+
continue
59+
# print(job_elem.prettify()) # to inspect the 'None' element further
60+
print(title_elem.text.strip())
61+
link_elem = title_elem.find('a')
62+
print(link_elem['href'])
63+
print(company_elem.text.strip())
64+
print(location_elem.text.strip())
65+
print()
66+
67+
68+
# USE THE SCRIPT AS A COMMAND-LINE INTERFACE
69+
# --------------------------------------------------------------------------------
70+
my_parser = argparse.ArgumentParser(prog='jobs', description='Find Developer Jobs')
71+
my_parser.add_argument('-location',
72+
metavar='location',
73+
type=str,
74+
help='The location of the job')
75+
my_parser.add_argument('-word',
76+
metavar='word',
77+
type=str,
78+
help='What keyword to filter by')
79+
80+
args = my_parser.parse_args()
81+
location, keyword = args.location, args.word
82+
83+
results = scrape_jobs(location)
84+
if keyword:
85+
filter_jobs_by_keyword(results, keyword.lower())
86+
else:
87+
print_all_jobs(results)

0 commit comments

Comments
 (0)