Skip to content

Commit 3840095

Browse files
authored
Merge pull request #97 from Zohair-coder/dev
Dev
2 parents 1898375 + 3cdf085 commit 3840095

File tree

4 files changed

+61
-15
lines changed

4 files changed

+61
-15
lines changed

docker-compose.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
version: "3"
21
services:
32
scraper:
43
build: .

src/helpers.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@ def send_request(
1010
method: str = "GET",
1111
json: dict[str, Any] = {},
1212
headers: dict[str, str] = {},
13+
data: dict[str, Any] = {},
1314
) -> Response:
1415
try:
15-
resp = send_request_helper(session, url, method, json, headers)
16+
resp = send_request_helper(session, url, method, json, headers, data)
1617
resp.raise_for_status()
1718
except Exception as ex:
1819
retries = 10
1920
for _ in range(retries):
2021
try:
21-
resp = send_request_helper(session, url, method, json, headers)
22+
resp = send_request_helper(session, url, method, json, headers, data)
2223
resp.raise_for_status()
2324
except Exception as inner_ex:
2425
if (
@@ -51,12 +52,13 @@ def send_request_helper(
5152
method: str,
5253
json: dict[str, Any],
5354
headers: dict[str, str],
55+
data: dict[str, Any],
5456
) -> Response:
5557
timeout = 2
5658
if method == "GET":
5759
resp = session.get(url, headers=headers, timeout=timeout)
5860
elif method == "POST":
59-
resp = session.post(url, json=json, headers=headers, timeout=timeout)
61+
resp = session.post(url, json=json, data=data, headers=headers, timeout=timeout)
6062
else:
6163
raise Exception("Invalid method: {}".format(method))
6264
return resp

src/login.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
from requests import Session
22
from bs4 import BeautifulSoup, Tag
33
import re
4+
from typing import Any
45

56
import config
67
import totp
8+
from helpers import send_request
79

810

911
def login_with_drexel_connect(session: Session) -> Session:
10-
response = session.get(config.drexel_connect_base_url)
12+
response = send_request(session, config.drexel_connect_base_url, method="GET")
13+
assert response.status_code == 200, "Failed to get Drexel Connect login page"
1114
soup = BeautifulSoup(response.text, "html.parser")
1215

1316
csrf_token = extract_csrf_token(soup)
@@ -21,26 +24,44 @@ def login_with_drexel_connect(session: Session) -> Session:
2124
}
2225

2326
# this should send the credentials and send the MFA request
24-
response = session.post(
25-
config.drexel_connect_base_url + form_action_path, data=login_payload
27+
response = send_request(
28+
session,
29+
config.drexel_connect_base_url + form_action_path,
30+
data=login_payload,
31+
method="POST",
2632
)
33+
assert (
34+
response.status_code == 200
35+
), "Failed to send request to Drexel Connect with username and password"
2736

2837
soup = BeautifulSoup(response.text, "html.parser")
2938
data = parse_initial_mfa_page(soup)
3039

31-
response = session.post(
32-
config.drexel_connect_base_url + data["url"], data=data["form-data"]
40+
response = send_request(
41+
session,
42+
config.drexel_connect_base_url + data["url"],
43+
data=data["form-data"],
44+
method="POST",
3345
)
46+
assert (
47+
response.status_code == 200
48+
), "Failed to request MFA code page from Drexel Connect"
3449
json_response = response.json()
3550

3651
data = {
3752
json_response["csrfN"]: json_response["csrfV"],
3853
"_eventId": json_response["actValue"],
3954
}
4055

41-
response = session.post(
42-
config.drexel_connect_base_url + json_response["flowExURL"], data=data
56+
response = send_request(
57+
session,
58+
config.drexel_connect_base_url + json_response["flowExURL"],
59+
data=data,
60+
method="POST",
4361
)
62+
assert (
63+
response.status_code == 200
64+
), "Failed to receive MFA code page from Drexel Connect"
4465
soup = BeautifulSoup(response.text, "html.parser")
4566

4667
parsed_data = parse_final_mfa_page(soup)
@@ -53,9 +74,15 @@ def login_with_drexel_connect(session: Session) -> Session:
5374
"j_mfaToken": totp_code,
5475
}
5576

56-
response = session.post(
57-
config.drexel_connect_base_url + parsed_data["url"], data=data
77+
response = send_request(
78+
session,
79+
config.drexel_connect_base_url + parsed_data["url"],
80+
data=data,
81+
method="POST",
5882
)
83+
assert (
84+
response.status_code == 200
85+
), "Failed to send MFA code to Drexel Connect (final step)"
5986

6087
return session
6188

@@ -98,7 +125,7 @@ def extract_form_action_path(soup: BeautifulSoup) -> str:
98125
return form_action_path
99126

100127

101-
def parse_initial_mfa_page(soup: BeautifulSoup) -> dict[str, str]:
128+
def parse_initial_mfa_page(soup: BeautifulSoup) -> dict[str, Any]:
102129
data = {}
103130

104131
# get the first script tag that isn't empty

src/scrape.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import json
44
import os
55
from typing import Any
6+
import traceback
7+
import time
68

79
from helpers import send_request
810
from parse import parse_subject_page, parse_crn_page
@@ -15,7 +17,23 @@ def scrape(
1517
) -> dict[str, dict[str, Any]]:
1618
session = Session()
1719

18-
session = login.login_with_drexel_connect(session)
20+
is_logged_into_drexel_connect = False
21+
failiure_count = 0
22+
while not is_logged_into_drexel_connect:
23+
try:
24+
session = login.login_with_drexel_connect(session)
25+
if "shib_idp_session" in session.cookies:
26+
is_logged_into_drexel_connect = True
27+
break
28+
except Exception:
29+
print("Error logging in to Drexel Connect: ")
30+
print(traceback.format_exc())
31+
print("Trying again...")
32+
33+
failiure_count += 1
34+
if failiure_count > 5:
35+
raise Exception("Failed to log in to Drexel Connect after 5 attempts")
36+
time.sleep(1)
1937

2038
data: dict[str, dict[str, Any]] = {}
2139

0 commit comments

Comments
 (0)