Skip to content

Commit 9c3fc98

Browse files
committed
made system more robust
1 parent a810e37 commit 9c3fc98

File tree

3 files changed

+27
-8
lines changed

3 files changed

+27
-8
lines changed

docker-compose.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
version: "3"
21
services:
32
scraper:
43
build: .

src/login.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
def login_with_drexel_connect(session: Session) -> Session:
1212
response = send_request(session, config.drexel_connect_base_url, method="GET")
13+
assert response.status_code == 200, "Failed to get Drexel Connect login page"
1314
soup = BeautifulSoup(response.text, "html.parser")
1415

1516
csrf_token = extract_csrf_token(soup)
@@ -24,11 +25,13 @@ def login_with_drexel_connect(session: Session) -> Session:
2425

2526
# this should send the credentials and send the MFA request
2627
response = send_request(session, config.drexel_connect_base_url + form_action_path, data=login_payload, method="POST")
28+
assert response.status_code == 200, "Failed to send request to Drexel Connect with username and password"
2729

2830
soup = BeautifulSoup(response.text, "html.parser")
2931
data = parse_initial_mfa_page(soup)
3032

3133
response = send_request(session, config.drexel_connect_base_url + data["url"], data=data["form-data"], method="POST")
34+
assert response.status_code == 200, "Failed to request MFA code page from Drexel Connect"
3235
json_response = response.json()
3336

3437
data = {
@@ -37,6 +40,7 @@ def login_with_drexel_connect(session: Session) -> Session:
3740
}
3841

3942
response = send_request(session, config.drexel_connect_base_url + json_response["flowExURL"], data=data, method="POST")
43+
assert response.status_code == 200, "Failed to receive MFA code page from Drexel Connect"
4044
soup = BeautifulSoup(response.text, "html.parser")
4145

4246
parsed_data = parse_final_mfa_page(soup)
@@ -49,10 +53,9 @@ def login_with_drexel_connect(session: Session) -> Session:
4953
"j_mfaToken": totp_code,
5054
}
5155

52-
# session.post(
53-
# config.drexel_connect_base_url + parsed_data["url"], data=data
54-
# )
55-
send_request(session, config.drexel_connect_base_url + parsed_data["url"], data=data, method="POST")
56+
response = send_request(session, config.drexel_connect_base_url + parsed_data["url"], data=data, method="POST")
57+
assert response.status_code == 200, "Failed to send MFA code to Drexel Connect (final step)"
58+
5659
return session
5760

5861

src/scrape.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import json
44
import os
55
from typing import Any
6+
import traceback
7+
import time
68

79
from helpers import send_request
810
from parse import parse_subject_page, parse_crn_page
@@ -15,9 +17,24 @@ def scrape(
1517
) -> dict[str, dict[str, Any]]:
1618
session = Session()
1719

18-
session = login.login_with_drexel_connect(session)
19-
assert "shib_idp_session" in session.cookies, "Failed to log in to Drexel Connect"
20-
20+
is_logged_into_drexel_connect = False
21+
failiure_count = 0
22+
while not is_logged_into_drexel_connect:
23+
try:
24+
session = login.login_with_drexel_connect(session)
25+
if "shib_idp_session" in session.cookies:
26+
is_logged_into_drexel_connect = True
27+
break
28+
except Exception:
29+
print("Error logging in to Drexel Connect: ")
30+
print(traceback.format_exc())
31+
print("Trying again...")
32+
33+
failiure_count += 1
34+
if failiure_count > 5:
35+
raise Exception("Failed to log in to Drexel Connect after 5 attempts")
36+
time.sleep(1)
37+
2138
data: dict[str, dict[str, Any]] = {}
2239

2340
if not all_colleges:

0 commit comments

Comments
 (0)