Skip to content

Commit 19957a3

Browse files
committed
add scrape all
1 parent f816d79 commit 19957a3

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

backend/courses/management/commands/scrape_prereqs.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
import requests
88
from django.core.management.base import BaseCommand
9+
from django.db.models import Min, Q
10+
11+
from courses.models import Course
12+
from courses.util import get_semesters
913

1014

1115
API_URL = "https://courses.upenn.edu/api/?page=fose&route=details"
@@ -123,6 +127,23 @@ def load_pairs_from_file(file_path: Path) -> list[tuple[str, str]]:
123127
raise ValueError("Unsupported --input-file type. Use .json or .csv")
124128

125129

130+
def load_pairs_from_courses(semesters: list[str]) -> list[tuple[str, str]]:
131+
queryset = (
132+
Course.objects.filter(semester__in=semesters)
133+
.annotate(
134+
sample_crn=Min(
135+
"sections__crn",
136+
filter=Q(sections__crn__isnull=False) & ~Q(sections__crn=""),
137+
)
138+
)
139+
.exclude(sample_crn__isnull=True)
140+
.values_list("department__code", "code", "sample_crn")
141+
.order_by("department__code", "code")
142+
)
143+
144+
return [(normalize_course_code(f"{dept} {code}"), str(crn)) for dept, code, crn in queryset]
145+
146+
126147
class Command(BaseCommand):
127148
help = "Scrape clssnotes from courses.upenn.edu and save compact JSON output."
128149

@@ -142,6 +163,21 @@ def add_arguments(self, parser):
142163
default=None,
143164
help="Path to .json or .csv containing course code + CRN pairs.",
144165
)
166+
parser.add_argument(
167+
"--all-course-codes",
168+
action="store_true",
169+
default=False,
170+
help="Scrape one CRN per course code from DB instead of manually provided pairs.",
171+
)
172+
parser.add_argument(
173+
"--semesters",
174+
type=str,
175+
default=None,
176+
help=(
177+
"Semester scope for --all-course-codes. "
178+
"Comma-separated (e.g. 2025C,2026A), 'all', or omitted for current semester."
179+
),
180+
)
145181
parser.add_argument(
146182
"--output-file",
147183
type=str,
@@ -158,6 +194,7 @@ def add_arguments(self, parser):
158194
def handle(self, *args, **kwargs):
159195
pair_args: list[str] = kwargs["pair"]
160196
input_file = kwargs["input_file"]
197+
all_course_codes = kwargs["all_course_codes"]
161198
timeout_seconds = kwargs["timeout_seconds"]
162199

163200
pairs: list[tuple[str, str]] = []
@@ -168,6 +205,14 @@ def handle(self, *args, **kwargs):
168205
if input_file:
169206
pairs.extend(load_pairs_from_file(Path(input_file)))
170207

208+
if all_course_codes:
209+
semesters = get_semesters(kwargs.get("semesters"))
210+
db_pairs = load_pairs_from_courses(semesters)
211+
self.stdout.write(
212+
f"Loaded {len(db_pairs)} course/CRN pairs from DB for semesters: {', '.join(semesters)}"
213+
)
214+
pairs.extend(db_pairs)
215+
171216
deduped_pairs = []
172217
seen = set()
173218
for course_code, crn in pairs:

0 commit comments

Comments
 (0)