Skip to content

Commit 5ff6a44

Browse files
committed
[main] Updated parser
1 parent b29626d commit 5ff6a44

File tree

1 file changed

+18
-11
lines changed

1 file changed

+18
-11
lines changed

webscraperudel/webscraper.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import time
22
from pprint import pprint
3+
from typing import is_typeddict
4+
import requests
35
from requests.auth import HTTPBasicAuth
46
from bs4 import BeautifulSoup, ResultSet
57
from webdriver_manager.firefox import GeckoDriverManager
68
from selenium import webdriver
79

10+
ALL_COURSE_URL = 'https://udapps.nss.udel.edu/CoursesSearch/search-results?term=2228&search_type=A&course_sec=&session=All&course_title=&instr_name=&text_info=All&campus=&instrtn_mode=All&time_start_hh=&time_start_ampm=&credit=Any&keyword=&geneduc=&subj_area_code=&college='
11+
812
campus_mapping = {
913
'NEWRK': 'Newark',
1014
'DOVER': 'Dover',
@@ -50,7 +54,7 @@ def parse_course_name(name: str):
5054
section_ = ''
5155
is_number = False
5256
for i in range(len(name)):
53-
if not is_number and name.isdigit():
57+
if is_number and name[i].isdigit():
5458
if len(number_) == 3:
5559
section_ += name[i]
5660
else:
@@ -118,18 +122,21 @@ def parse_course_location(locationstr: str):
118122

119123
def main():
120124
base_url = 'https://udapps.nss.udel.edu/CoursesSearch/search-results'
121-
br = webdriver.Firefox(executable_path=GeckoDriverManager().install())
122-
br.get('{}?{}'.format(base_url, generate_search_endpoint('A')))
123-
time.sleep(2)
124-
soup = BeautifulSoup(br.page_source, "html.parser")
125+
page = requests.get('{}?{}'.format(
126+
ALL_COURSE_URL, generate_search_endpoint('A')))
127+
while (not page):
128+
pass
129+
soup = BeautifulSoup(page.content, "html.parser")
125130
course_information = {}
126-
odd_rows: ResultSet = soup.find_all('tr', class_='odd')
127-
even_rows = soup.find_all('tr', class_='even')
128-
for eachrow in odd_rows:
129-
[name, number, section] = parse_course_name(eachrow.find(
130-
'a', class_='coursenum').string)
131-
print(eachrow.contents[3].string)
131+
rows: ResultSet = soup.tbody.find_all('tr')
132+
for eachrow in rows[:1]:
133+
# print(eachrow.contents)
134+
[name, number, section] = parse_course_name(
135+
eachrow.find('td', class_='course').a.text)
132136
print([name, number, section])
137+
# print([name, number, section])
138+
# print(eachrow.contents[3].string)
139+
# print([name, number, section])
133140
# course_title = eachrow.children[1].text()
134141
# course_campus = eachrow.find('td', class_='campus').text().strip()
135142
# if course_campus in campus_mapping:

0 commit comments

Comments
 (0)