Skip to content

Commit 7c32a9d

Browse files
committed
implemented 2FA
1 parent b4c142b commit 7c32a9d

File tree

9 files changed

+263
-81
lines changed

9 files changed

+263
-81
lines changed
Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,91 @@
1-
# name: Build and Deploy to Kubernetes
1+
name: Build and Deploy to Kubernetes
22

3-
# on:
4-
# push:
5-
# branches:
6-
# - main
7-
# - dev
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- dev
88

9-
# env:
10-
# DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
11-
# DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
12-
# KUBECONFIG: ${{ secrets.KUBECONFIG }}
13-
# IMAGE_NAME: zohairr/drexelscheduler-scraper
9+
env:
10+
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
11+
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
12+
KUBECONFIG: ${{ secrets.KUBECONFIG }}
13+
IMAGE_NAME: zohairr/drexelscheduler-scraper
1414

15-
# jobs:
16-
# build-and-push:
17-
# runs-on: ubuntu-latest
18-
# steps:
19-
# - name: Checkout code
20-
# uses: actions/checkout@v4
15+
jobs:
16+
build-and-push:
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Checkout code
20+
uses: actions/checkout@v4
2121

22-
# - name: Set up QEMU
23-
# uses: docker/setup-qemu-action@v3
22+
- name: Set up QEMU
23+
uses: docker/setup-qemu-action@v3
2424

25-
# - name: Set up Docker Buildx
26-
# uses: docker/setup-buildx-action@v3
25+
- name: Set up Docker Buildx
26+
uses: docker/setup-buildx-action@v3
2727

28-
# - name: Login to DockerHub
29-
# run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin
28+
- name: Login to DockerHub
29+
run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin
3030

31-
# - name: Build and push Docker image
32-
# run: |
33-
# COMMIT_SHA=$(echo $GITHUB_SHA | cut -c1-7)
34-
# docker buildx create --use
35-
# docker buildx build --platform linux/arm64 -t $IMAGE_NAME:$COMMIT_SHA . --push
31+
- name: Build and push Docker image
32+
run: |
33+
COMMIT_SHA=$(echo $GITHUB_SHA | cut -c1-7)
34+
docker buildx create --use
35+
docker buildx build --platform linux/arm64 -t $IMAGE_NAME:$COMMIT_SHA . --push
3636
37-
# deploy:
38-
# needs: build-and-push
39-
# runs-on: ubuntu-latest
40-
# steps:
41-
# - name: Checkout code
42-
# uses: actions/checkout@v4
37+
deploy:
38+
needs: build-and-push
39+
runs-on: ubuntu-latest
40+
steps:
41+
- name: Checkout code
42+
uses: actions/checkout@v4
4343

44-
# - name: Install and configure kubectl
45-
# run: |
46-
# sudo snap install kubectl --classic
47-
# echo "$KUBECONFIG" | base64 --decode > kubeconfig.yaml
44+
- name: Install and configure kubectl
45+
run: |
46+
sudo snap install kubectl --classic
47+
echo "$KUBECONFIG" | base64 --decode > kubeconfig.yaml
4848
49-
# - name: Set Namespace based on Branch
50-
# run: echo "NAMESPACE=$(if [ "${{ github.ref }}" = "refs/heads/main" ]; then echo 'default'; else echo 'dev'; fi)" >> $GITHUB_ENV
49+
- name: Set Namespace based on Branch
50+
run: echo "NAMESPACE=$(if [ "${{ github.ref }}" = "refs/heads/main" ]; then echo 'default'; else echo 'dev'; fi)" >> $GITHUB_ENV
5151

52-
# - name: Set up Helm
53-
# run: |
54-
# curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
55-
# chmod 700 get_helm.sh
56-
# ./get_helm.sh
52+
- name: Set up Helm
53+
run: |
54+
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
55+
chmod 700 get_helm.sh
56+
./get_helm.sh
5757
58-
# - name: Check for running Kubernetes jobs
59-
# run: |
60-
# KUBECONFIG=kubeconfig.yaml
61-
# while kubectl get jobs -n ${{ env.NAMESPACE }} -o jsonpath='{.items[?(@.status.active)].metadata.name}' | grep "drexel-scheduler-cronjob"; do
62-
# echo "Waiting for jobs to complete..."
63-
# sleep 30
64-
# done
58+
- name: Check for running Kubernetes jobs
59+
run: |
60+
KUBECONFIG=kubeconfig.yaml
61+
while kubectl get jobs -n ${{ env.NAMESPACE }} -o jsonpath='{.items[?(@.status.active)].metadata.name}' | grep "drexel-scheduler-cronjob"; do
62+
echo "Waiting for jobs to complete..."
63+
sleep 30
64+
done
6565
66-
# - name: Deploy to Kubernetes
67-
# run: |
68-
# COMMIT_SHA=$(echo $GITHUB_SHA | cut -c1-7)
69-
# VALUES_FILE="./k8s/drexel-scraper/values-${{ env.NAMESPACE }}.yaml"
70-
# RELEASE_NAME="drexel-scraper-${{ env.NAMESPACE }}"
71-
# KUBECONFIG=kubeconfig.yaml helm upgrade --install $RELEASE_NAME ./k8s/drexel-scraper -f $VALUES_FILE -n ${{ env.NAMESPACE }} --set image.tag=$COMMIT_SHA --atomic
66+
- name: Deploy to Kubernetes
67+
run: |
68+
COMMIT_SHA=$(echo $GITHUB_SHA | cut -c1-7)
69+
VALUES_FILE="./k8s/drexel-scraper/values-${{ env.NAMESPACE }}.yaml"
70+
RELEASE_NAME="drexel-scraper-${{ env.NAMESPACE }}"
71+
KUBECONFIG=kubeconfig.yaml helm upgrade --install $RELEASE_NAME ./k8s/drexel-scraper -f $VALUES_FILE -n ${{ env.NAMESPACE }} --set image.tag=$COMMIT_SHA --atomic
7272
73-
# - name: Clear Cache
74-
# run: |
75-
# kubectl create job -n ${{ env.NAMESPACE }} --from=cronjob/delete-extra-course-data-cronjob github-actions-delete-credits-job --kubeconfig=kubeconfig.yaml
76-
# kubectl create job -n ${{ env.NAMESPACE }} --from=cronjob/delete-ratings-cronjob github-actions-delete-ratings-job --kubeconfig=kubeconfig.yaml
73+
- name: Clear Cache
74+
run: |
75+
kubectl create job -n ${{ env.NAMESPACE }} --from=cronjob/delete-extra-course-data-cronjob github-actions-delete-credits-job --kubeconfig=kubeconfig.yaml
76+
kubectl create job -n ${{ env.NAMESPACE }} --from=cronjob/delete-ratings-cronjob github-actions-delete-ratings-job --kubeconfig=kubeconfig.yaml
7777
78-
# - name: Add Grafana Helm Repo
79-
# run: |
80-
# helm repo add grafana https://grafana.github.io/helm-charts
81-
# helm repo update
78+
- name: Add Grafana Helm Repo
79+
run: |
80+
helm repo add grafana https://grafana.github.io/helm-charts
81+
helm repo update
8282
83-
# - name: Deploy Grafana to Dev
84-
# if: github.ref == 'refs/heads/dev'
85-
# run: |
86-
# KUBECONFIG=kubeconfig.yaml helm upgrade --install grafana-dev grafana/grafana -f k8s/grafana-dev-values.yaml -n dev
83+
- name: Deploy Grafana to Dev
84+
if: github.ref == 'refs/heads/dev'
85+
run: |
86+
KUBECONFIG=kubeconfig.yaml helm upgrade --install grafana-dev grafana/grafana -f k8s/grafana-dev-values.yaml -n dev
8787
88-
# - name: Deploy Grafana to Prod
89-
# if: github.ref == 'refs/heads/main'
90-
# run: |
91-
# KUBECONFIG=kubeconfig.yaml helm upgrade --install grafana grafana/grafana -f k8s/grafana-default-values.yaml --set env.GF_ANALYTICS_GOOGLE_ANALYTICS_4_ID=${{ secrets.GOOGLE_ANALYTICS_ID }} -n default
88+
- name: Deploy Grafana to Prod
89+
if: github.ref == 'refs/heads/main'
90+
run: |
91+
KUBECONFIG=kubeconfig.yaml helm upgrade --install grafana grafana/grafana -f k8s/grafana-default-values.yaml --set env.GF_ANALYTICS_GOOGLE_ANALYTICS_4_ID=${{ secrets.GOOGLE_ANALYTICS_ID }} -n default

.github/workflows/functional_test.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ jobs:
4848

4949
- name: Run the scraper
5050
env:
51-
DREXEL_SCHEDULER_SHIB_IDP_SESSION_COOKIE: ${{ secrets.DREXEL_SCHEDULER_SHIB_IDP_SESSION_COOKIE }}
51+
DREXEL_USERNAME: ${{ secrets.DREXEL_USERNAME }}
52+
DREXEL_PASSWORD: ${{ secrets.DREXEL_PASSWORD }}
53+
DREXEL_MFA_SECRET_KEY: ${{ secrets.DREXEL_MFA_SECRET_KEY }}
5254
run: docker compose run scraper python3 src/main.py --db --all-colleges --ratings
5355

5456
- name: Verify courses data exists in database

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,6 @@ grafana_data
77
trash.py
88
profile_output.pstat
99
callgrind.out.profile
10-
.venv
10+
.venv
11+
test.py
12+
test.txt

.vscode/launch.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
55
"version": "0.2.0",
66
"configurations": [
7+
{
8+
"name": "Python Debugger: Current File",
9+
"type": "debugpy",
10+
"request": "launch",
11+
"program": "${file}",
12+
"console": "integratedTerminal"
13+
},
714
{
815
"name": "Python: main.py",
916
"type": "debugpy",

docker-compose.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ services:
1212
AWS_DEFAULT_REGION: us-east-1
1313
AWS_ACCESS_KEY_ID: "my-access-key"
1414
AWS_SECRET_ACCESS_KEY: "my-secret-access-key"
15-
DREXEL_SCHEDULER_SHIB_IDP_SESSION_COOKIE: ${DREXEL_SCHEDULER_SHIB_IDP_SESSION_COOKIE}
15+
DREXEL_USERNAME: ${DREXEL_USERNAME}
16+
DREXEL_PASSWORD: ${DREXEL_PASSWORD}
17+
DREXEL_MFA_SECRET_KEY: ${DREXEL_MFA_SECRET_KEY}
1618
volumes:
1719
- ./:/app
1820
depends_on:

src/config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,17 @@
1515
# example values = CI, A, AS
1616
college_code = "CI"
1717

18+
# Drexel Connect Credentials
19+
drexel_username = os.environ["DREXEL_USERNAME"]
20+
drexel_password = os.environ["DREXEL_PASSWORD"]
21+
drexel_mfa_secret_key = os.environ["DREXEL_MFA_SECRET_KEY"]
22+
1823
# URL's
1924
tms_base_url = "https://termmasterschedule.drexel.edu"
2025
tms_home_url = tms_base_url + "/webtms_du"
2126
tms_quarter_url = tms_home_url + "/collegesSubjects/" + year + quarter
2227

23-
# Cookies
24-
shib_idp_session_cookie = os.environ["DREXEL_SCHEDULER_SHIB_IDP_SESSION_COOKIE"]
28+
drexel_connect_base_url = "https://connect.drexel.edu"
2529

2630
# Email AWS Configuration
2731
topic_arn = os.getenv("DREXEL_SCHEDULER_TOPIC_ARN")

src/login.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
from requests import Session
2+
from bs4 import BeautifulSoup, Tag
3+
import sys
4+
import re
5+
6+
import config
7+
import totp
8+
9+
def login_with_drexel_connect(session: Session):
10+
response = session.get(config.drexel_connect_base_url)
11+
soup = BeautifulSoup(response.text, "html.parser")
12+
13+
csrf_token = extract_csrf_token(soup)
14+
form_action_path = extract_form_action_path(soup)
15+
16+
login_payload = {
17+
"j_username": config.drexel_username,
18+
"j_password": config.drexel_password,
19+
"csrf_token": csrf_token,
20+
"_eventId_proceed": ""
21+
}
22+
23+
# this should send the credentials and send the MFA request
24+
response = session.post(config.drexel_connect_base_url + form_action_path, data=login_payload)
25+
26+
soup = BeautifulSoup(response.text, "html.parser")
27+
data = parse_initial_mfa_page(soup)
28+
29+
response = session.post(config.drexel_connect_base_url + data["url"], data=data["form-data"])
30+
json_response = response.json()
31+
32+
data = {
33+
json_response["csrfN"]: json_response["csrfV"],
34+
"_eventId": json_response["actValue"],
35+
}
36+
37+
response = session.post(config.drexel_connect_base_url + json_response["flowExURL"], data=data)
38+
soup = BeautifulSoup(response.text, "html.parser")
39+
40+
parsed_data = parse_final_mfa_page(soup)
41+
42+
totp_code = totp.get_token(config.drexel_mfa_secret_key)
43+
44+
data = {
45+
"csrf_token": parsed_data["csrf_token"],
46+
"_eventId": "proceed",
47+
"j_mfaToken": totp_code
48+
}
49+
50+
response = session.post(config.drexel_connect_base_url + parsed_data["url"], data=data)
51+
52+
return session
53+
54+
def extract_csrf_token(soup: BeautifulSoup) -> str:
55+
csrf_token_input_tag = soup.find("input", {"name": "csrf_token"})
56+
57+
if not isinstance(csrf_token_input_tag, Tag):
58+
raise Exception("Could not find CSRF token.")
59+
60+
csrf_token = csrf_token_input_tag["value"]
61+
62+
if not isinstance(csrf_token, str):
63+
raise Exception(f"CSRF token was not a string. Found: {csrf_token} of type: {type(csrf_token)}")
64+
65+
return csrf_token
66+
67+
def extract_form_action_path(soup: BeautifulSoup) -> str:
68+
# the form is a child of a div with id "login-box"
69+
login_box_div = soup.find("div", {"id": "login-box"})
70+
71+
if not isinstance(login_box_div, Tag):
72+
raise Exception("Could not find login box div.")
73+
74+
login_form = login_box_div.find("form")
75+
76+
if not isinstance(login_form, Tag):
77+
raise Exception("Could not find login form.")
78+
79+
form_action_path = login_form["action"]
80+
81+
if not isinstance(form_action_path, str):
82+
raise Exception(f"Form action path was not a string. Found: {form_action_path} of type: {type(form_action_path)}")
83+
84+
return form_action_path
85+
86+
def parse_initial_mfa_page(soup: BeautifulSoup) -> dict[str, str]:
87+
data = {}
88+
89+
# get the first script tag that isn't empty
90+
script_tag = soup.find("script", string=lambda text: text and len(text) > 0)
91+
92+
if not isinstance(script_tag, Tag):
93+
raise Exception("Could not find non-empty script tag.")
94+
95+
script_content = script_tag.string
96+
97+
if not isinstance(script_content, str):
98+
raise Exception(f"Script content was not a string. Found: {script_content} of type: {type(script_content)}")
99+
100+
url_match = re.search(r"url:\s*['\"](/idp/profile/cas/login\?execution=[^'\"]+)['\"]", script_content)
101+
if not url_match:
102+
raise Exception("Could not find MFA URL.")
103+
104+
event_id_match = event_id_match = re.search(r"data:\s*'_eventId=([^'&]+)&csrf_token", script_content)
105+
if not event_id_match:
106+
raise Exception("Could not find MFA event ID.")
107+
108+
csrf_token_match = re.search(r"csrf_token=([^'&]+)", script_content)
109+
if not csrf_token_match:
110+
raise Exception("Could not find MFA CSRF token.")
111+
112+
data["url"] = url_match.group(1)
113+
data["form-data"] = {
114+
"_eventId": event_id_match.group(1),
115+
"csrf_token": csrf_token_match.group(1)
116+
}
117+
118+
119+
return data
120+
121+
def parse_final_mfa_page(soup: BeautifulSoup) -> dict[str, str]:
122+
data: dict[str, str] = {}
123+
124+
# get form by id "otp"
125+
form = soup.find("form", {"id": "otp"})
126+
127+
if not isinstance(form, Tag):
128+
raise Exception("Could not find OTP form.")
129+
130+
url = form["action"]
131+
132+
if not isinstance(url, str):
133+
raise Exception(f"Action was not a string. Found: {url} of type: {type(url)}")
134+
135+
csrf_token_input = form.find("input", {"name": "csrf_token"})
136+
137+
if not isinstance(csrf_token_input, Tag):
138+
raise Exception("Could not find CSRF token input.")
139+
140+
csrf_token = csrf_token_input["value"]
141+
142+
if not isinstance(csrf_token, str):
143+
raise Exception(f"CSRF token was not a string. Found: {csrf_token} of type: {type(csrf_token)}")
144+
145+
data["url"] = url
146+
data["csrf_token"] = csrf_token
147+
148+
return data

src/scrape.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
from helpers import send_request
88
from parse import parse_subject_page, parse_crn_page
99
import config
10-
10+
import login
1111

1212
def scrape(
1313
include_ratings: bool = False, all_colleges: bool = False
1414
) -> dict[str, dict[str, Any]]:
1515
session = Session()
16-
session.cookies.set("shib_idp_session", config.shib_idp_session_cookie) # type: ignore
16+
17+
session = login.login_with_drexel_connect(session)
1718

1819
data: dict[str, dict[str, Any]] = {}
1920

0 commit comments

Comments
 (0)