66from bs4 import BeautifulSoup , ResultSet
77from webdriver_manager .firefox import GeckoDriverManager
88from selenium import webdriver
9+ import re
910
1011ALL_COURSE_URL = 'https://udapps.nss.udel.edu/CoursesSearch/search-results?term=2228&search_type=A&course_sec=&session=All&course_title=&instr_name=&text_info=All&campus=&instrtn_mode=All&time_start_hh=&time_start_ampm=&credit=Any&keyword=&geneduc=&subj_area_code=&college='
12+ not_available = 'N/A'
13+ course_information = {}
1114
1215campus_mapping = {
1316 'NEWRK' : 'Newark' ,
2225 'T' : 'Tuesday' ,
2326 'W' : 'Wednesday' ,
2427 'TR' : 'Thursday' ,
25- 'R ' : 'Friday'
28+ 'F ' : 'Friday' ,
2629}
2730
2831location_mapping = {
@@ -77,16 +80,12 @@ def parse_course_days(daystr: str):
7780 course_days = []
7881 if 'M' in daystr :
7982 course_days .append (course_mapping ['M' ])
80- elif 'W' in daystr :
83+ if 'W' in daystr :
8184 course_days .append (course_mapping ['W' ])
82- elif 'F' in daystr :
85+ if 'F' in daystr :
8386 course_days .append (course_mapping ['F' ])
84- elif 'TR' in daystr :
87+ if 'TR' in daystr :
8588 course_days .append (course_mapping ['TR' ])
86- elif 'R' in daystr and 'TR' not in daystr :
87- course_days .append (course_mapping ['R' ])
88- elif 'R' in daystr and daystr .count ('R' ) == 2 :
89- course_days .append (course_mapping ['R' ])
9089 return course_days
9190
9291
@@ -97,10 +96,16 @@ def parse_course_time(timestr: str):
9796 Returns:
9897 The parsed time of the course, [lower, upper] bounds
9998 """
100- split_timestr = timestr .replace ( 'PM' , '' ). split (' - ' )
99+ split_timestr = timestr .split (' - ' )
101100 left_bound = split_timestr [0 ]
102101 right_bound = split_timestr [1 ]
103- return [left_bound , right_bound ]
102+ if '\n ' in right_bound :
103+ _ind = right_bound .index ('\n ' )
104+ right_bound = right_bound [0 :_ind ].strip ()
105+ right_bound = right_bound .replace (
106+ 'AM' if 'AM' in right_bound else 'PM' , '' )
107+ is_am = 'AM' in right_bound
108+ return [left_bound , right_bound , is_am ]
104109
105110
106111def parse_course_location (locationstr : str ):
@@ -121,44 +126,212 @@ def parse_course_location(locationstr: str):
121126
122127
123128def main ():
124- base_url = 'https://udapps.nss.udel.edu/CoursesSearch/search-results'
129+ base_url = 'https: // udapps.nss.udel.edu/CoursesSearch/'
130+ next_button = None
131+ started_searching = False
125132 page = requests .get ('{}?{}' .format (
126133 ALL_COURSE_URL , generate_search_endpoint ('A' )))
127134 while (not page ):
128135 pass
129136 soup = BeautifulSoup (page .content , "html.parser" )
130- course_information = {}
131137 rows : ResultSet = soup .tbody .find_all ('tr' )
132- for eachrow in rows [:1 ]:
133- # print(eachrow.contents)
134- [name , number , section ] = parse_course_name (
135- eachrow .find ('td' , class_ = 'course' ).a .text )
136- print ([name , number , section ])
137- # print([name, number, section])
138- # print(eachrow.contents[3].string)
139- # print([name, number, section])
140- # course_title = eachrow.children[1].text()
141- # course_campus = eachrow.find('td', class_='campus').text().strip()
142- # if course_campus in campus_mapping:
143- # course_campus = campus_mapping[course_campus]
144- # course_credits = eachrow.children[4].text().replace('Hrs', '').strip()
145- # course_days = parse_course_days(
146- # eachrow.find('td', class_='day').text().strip())
147- # [start, end] = parse_course_time(
148- # eachrow.find('td', class_='time').text().strip())
149- # course_location = parse_course_location(eachrow.find(
150- # 'td', class_='location').children[0].text().strip())
151- # course_information[name]: dict = {
152- # course_number: number,
153- # course_section: section,
154- # course_title: course_title,
155- # course_campus: course_campus,
156- # course_credits: int(course_credits),
157- # course_days: course_days,
158- # course_start_time: start,
159- # course_end_time: end,
160- # course_location: course_location,
161- # }
138+ name = ''
139+ number = ''
140+ section = ''
141+ course_title = ''
142+ course_campus = ''
143+ course_total_seats = ''
144+ course_credits = ''
145+ course_day = ''
146+ course_time = ''
147+ course_location = ''
148+ course_teacher = ''
149+ course_prereqs = []
150+ course_prereqs_or = False
151+ course_coreqs = []
152+ course_coreqs_or = False
153+
154+ for eachrow in rows :
155+ course_prereqs = []
156+ course_prereqs_or = False
157+ course_coreqs = []
158+ course_coreqs_or = False
159+ try :
160+ [name , number , section ] = parse_course_name (
161+ eachrow .find ('td' , class_ = 'course' ).a .text )
162+ except :
163+ name = not_available
164+ number = not_available
165+ section = not_available
166+ try :
167+ course_title = eachrow .contents [3 ].text .strip ().split (' ' )[
168+ 0 ].strip ()
169+ except :
170+ course_title = not_available
171+ try :
172+ course_campus = campus_mapping [eachrow .find (
173+ 'td' , class_ = 'campus' ).text .strip ()]
174+ except :
175+ course_campus = not_available
176+ try :
177+ course_total_seats = eachrow .find (
178+ 'td' , class_ = 'openseats' ).text .strip ().replace ('CURRENTLY FULL' , '' ).split (' OF ' )[1 ].strip ()
179+ except :
180+ course_total_seats = not_available
181+ try :
182+ course_credits = eachrow .find (
183+ 'td' , string = re .compile ('Hrs' )).text .strip ().split (' Hrs' )[0 ]
184+ except :
185+ course_credits = not_available
186+ try :
187+ course_day = parse_course_days (
188+ eachrow .find ('td' , class_ = 'day' ).text .strip ())
189+ except :
190+ course_day = not_available
191+ try :
192+ course_time = parse_course_time (
193+ eachrow .find ('td' , class_ = 'time' ).text .strip ())
194+ except :
195+ course_time = not_available
196+ try :
197+ course_location = parse_course_location (
198+ eachrow .find ('td' , class_ = 'location' ).a .text .strip ())
199+ except :
200+ course_location = not_available
201+ try :
202+ course_teacher = eachrow .contents [len (
203+ eachrow .contents ) - 4 ].text .strip ()
204+ except :
205+ course_teacher = not_available
206+ stored_result = {}
207+ stored_result ['name' ] = name
208+ stored_result ['number' ] = number
209+ stored_result ['section' ] = section
210+ stored_result ['title' ] = course_title
211+ stored_result ['campus' ] = course_campus
212+ stored_result ['total_seats' ] = course_total_seats
213+ stored_result ['credits' ] = course_credits
214+ stored_result ['day' ] = course_day
215+ stored_result ['course_time' ] = course_time
216+ stored_result ['location' ] = course_location
217+ stored_result ['teacher' ] = course_teacher
218+ try :
219+ course_detail_link = eachrow .find_all (
220+ 'a' , class_ = 'coursenum' )[0 ]['href' ]
221+ url_ = f'{ base_url } { course_detail_link } ' .replace (
222+ ' // ' , '//' ).replace ('§ion' , '§ion' )
223+ course_detail_page = requests .get (
224+ url_ )
225+ souped_content = BeautifulSoup (
226+ course_detail_page .content , 'html.parser' )
227+ pre_req_paragraphs = souped_content .find_all (
228+ 'p' , string = re .compile ('PREREQ|Prerequisites' ))
229+ for eachsoupparagraph in pre_req_paragraphs :
230+ if 'PREREQ' in eachsoupparagraph .text :
231+ eachparagraph = eachsoupparagraph .text
232+ capital_ind = eachparagraph .index ('PREREQ' )
233+ capital_start = eachparagraph [capital_ind + 6 :]
234+ capital_second_start = capital_start .index (name )
235+ capital_start = capital_start [capital_second_start :]
236+ capital_end_index = capital_start .index ('.' )
237+ capital_substr = capital_start [0 :capital_end_index ]
238+ if 'or' in capital_substr :
239+ split_ors = capital_substr .split (' or ' )
240+ for eachclass in split_ors :
241+ if eachclass not in course_prereqs :
242+ course_prereqs .append (eachclass .strip ())
243+ course_prereqs_or = True
244+ elif 'and' in capital_substr :
245+ split_and = capital_substr .split (' and ' )
246+ for eachclass in split_and :
247+ if eachclass not in course_prereqs :
248+ course_prereqs .append (eachclass .strip ())
249+ else :
250+ capital_substr = capital_substr .strip ()
251+ if capital_substr not in course_prereqs :
252+ course_prereqs .append (capital_substr )
253+ elif 'Prerequisites' in eachparagraph .text :
254+ lowercase_ind = eachparagraph .index ('Prerequisites:' )
255+ capital_start = eachparagraph [lowercase_ind + 14 :]
256+ capital_second_start = capital_start .index (name )
257+ capital_start = capital_start [capital_second_start :]
258+ capital_end_index = capital_start .index ('.' )
259+ capital_substr = capital_start [0 :capital_end_index ]
260+ if 'or' in capital_substr :
261+ split_ors = capital_substr .split (' or ' )
262+ for eachclass in split_ors :
263+ if eachclass not in course_prereqs :
264+ course_prereqs .append (eachclass .strip ())
265+ course_prereqs_or = True
266+ elif 'and' in capital_substr :
267+ split_and = capital_substr .split (' and ' )
268+ for eachclass in split_and :
269+ if eachclass not in course_prereqs :
270+ course_prereqs .append (eachclass .strip ())
271+ else :
272+ capital_substr = capital_substr .strip ()
273+ if capital_substr not in course_prereqs :
274+ course_prereqs .append (capital_substr )
275+ if len (course_prereqs ) > 0 :
276+ course_prereqs .append (course_prereqs_or )
277+ except :
278+ pass
279+ try :
280+ coreq_paragraphs = souped_content .find_all (
281+ 'p' , string = re .compile ('COREQ|Corequisites' ))
282+ for eachsoupparagraph in coreq_paragraphs :
283+ if 'COREQ' in eachsoupparagraph .text :
284+ eachparagraph = eachsoupparagraph .text
285+ capital_ind = eachparagraph .index ('COREQ' )
286+ capital_start = eachparagraph [capital_ind + 6 :]
287+ capital_second_start = capital_start .index (name )
288+ capital_start = capital_start [capital_second_start :]
289+ capital_end_index = capital_start .index ('.' )
290+ capital_substr = capital_start [0 :capital_end_index ]
291+ if 'or' in capital_substr :
292+ split_ors = capital_substr .split (' or ' )
293+ for eachclass in split_ors :
294+ if eachclass not in course_coreqs :
295+ course_coreqs .append (eachclass .strip ())
296+ course_coreqs_or = True
297+ elif 'and' in capital_substr :
298+ split_and = capital_substr .split (' and ' )
299+ for eachclass in split_and :
300+ if eachclass not in course_coreqs :
301+ course_coreqs .append (eachclass .strip ())
302+ else :
303+ capital_substr = capital_substr .strip ()
304+ if capital_substr not in course_coreqs :
305+ course_coreqs .append (capital_substr )
306+ elif 'Corequisites' in eachparagraph .text :
307+ lowercase_ind = eachparagraph .index ('Corequisites:' )
308+ capital_start = eachparagraph [lowercase_ind + 14 :]
309+ capital_second_start = capital_start .index (name )
310+ capital_start = capital_start [capital_second_start :]
311+ capital_end_index = capital_start .index ('.' )
312+ capital_substr = capital_start [0 :capital_end_index ]
313+ if 'or' in capital_substr :
314+ split_ors = capital_substr .split (' or ' )
315+ for eachclass in split_ors :
316+ if eachclass not in course_coreqs :
317+ course_coreqs .append (eachclass .strip ())
318+ course_coreqs_or = True
319+ elif 'and' in capital_substr :
320+ split_and = capital_substr .split (' and ' )
321+ for eachclass in split_and :
322+ if eachclass not in course_coreqs :
323+ course_coreqs .append (eachclass .strip ())
324+ else :
325+ capital_substr = capital_substr .strip ()
326+ if capital_substr not in course_coreqs :
327+ course_coreqs .append (capital_substr )
328+ if len (course_coreqs ) > 0 :
329+ course_coreqs .append (course_coreqs )
330+ except :
331+ pass
332+ stored_result ['prereqs' ] = course_prereqs
333+ stored_result ['coreqs' ] = course_coreqs
334+ course_information [f'{ name } { number } { section } ' ] = stored_result
162335 pprint (course_information )
163336
164337
0 commit comments