Skip to content

Commit 03248f8

Browse files
dtemkin1psvenk
andauthored
Add PE subjects (#284)
In collaboration with DAPER --------- Co-authored-by: Pratyush Venkatakrishnan <contact@psvenk.com>
1 parent 20011d8 commit 03248f8

35 files changed

+2351
-525
lines changed

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ dist-ssr
2626

2727
# artifacts
2828
scrapers/catalog.json
29-
scrapers/fireroad-sem.json
30-
scrapers/fireroad-presem.json
29+
scrapers/fireroad-*.json
3130
scrapers/cim.json
31+
scrapers/pe-*.json
3232
public/latest.json
3333
public/i26.json
3434

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ addopts = "--doctest-modules"
3535
[tool.pylint.main]
3636
max-line-length = 88
3737
py-version = "3.8"
38+
disable = "fixme"
3839

3940
[tool.hatch.build.targets.wheel]
4041
packages = ["scrapers"]

scrapers/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .cim import run as cim_run
1212
from .fireroad import run as fireroad_run
1313
from .package import run as package_run
14+
from .pe import run as pe_run
1415

1516

1617
def run():
@@ -25,6 +26,8 @@ def run():
2526
catalog_run()
2627
print("=== Update CI-M data ===")
2728
cim_run()
29+
print("=== Update PE data ===")
30+
pe_run()
2831
print("=== Packaging ===")
2932
package_run()
3033

scrapers/package.py

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from collections.abc import Iterable
2020
from typing import Any
2121

22-
from .utils import get_term_info
22+
from scrapers.pe import get_pe_quarters
23+
from scrapers.utils import get_term_info
2324

2425
if sys.version_info >= (3, 11):
2526
import tomllib
@@ -45,32 +46,34 @@ def load_json_data(json_path: str) -> Any:
4546
return json.load(json_file)
4647

4748

48-
def load_toml_data(overrides_dir: str, subpath=".") -> dict[str, Any]:
49+
def load_toml_data(toml_path: str) -> dict[str, Any]:
4950
"""
50-
Loads data from the provided directory that consists exclusively of TOML files
51+
Loads data from the provided TOML file, or directory that consists exclusively of
52+
TOML files
5153
5254
Args:
53-
overrides_dir (str): The directory to load from
54-
subpath (str, optional): Load from a subdirectory. Defaults to ".".
55+
toml_path (str): The file or directory to load from
5556
5657
Returns:
5758
dict[str, Any]: The data contained within the directory
5859
"""
59-
overrides_path = os.path.join(package_dir, overrides_dir)
60-
out: dict[str, Any] = {}
61-
62-
if not os.path.isdir(os.path.join(overrides_path, subpath)):
63-
# directory doesn't exist, so we return an empty dict
60+
toml_path = os.path.join(package_dir, toml_path)
61+
62+
if os.path.isfile(toml_path):
63+
with open(toml_path, "rb") as toml_file:
64+
return tomllib.load(toml_file)
65+
elif os.path.isdir(toml_path):
66+
# If the path is a directory, we load all TOML files in it
67+
out = {}
68+
with os.scandir(toml_path) as entries:
69+
for entry in entries:
70+
if entry.is_file() and entry.name.endswith(".toml"):
71+
with open(entry.path, "rb") as toml_file:
72+
out.update(tomllib.load(toml_file))
6473
return out
65-
66-
# If the path is a directory, we load all TOML files in it
67-
toml_dir = os.path.join(overrides_path, subpath)
68-
for fname in os.listdir(toml_dir):
69-
if fname.endswith(".toml"):
70-
with open(os.path.join(toml_dir, fname), "rb") as toml_file:
71-
out.update(tomllib.load(toml_file))
72-
73-
return out
74+
else:
75+
# Neither a file nor a directory exists as this path, so we return an empty dict
76+
return {}
7477

7578

7679
def merge_data(
@@ -118,6 +121,7 @@ def get_include(overrides: dict[str, dict[str, Any]]) -> set[str]:
118121
return classes
119122

120123

124+
# pylint: disable=too-many-locals
121125
def run() -> None:
122126
"""
123127
The main entry point.
@@ -135,7 +139,7 @@ def run() -> None:
135139

136140
for sem in sem_types:
137141
fireroad_sem = load_json_data(f"fireroad-{sem}.json")
138-
overrides_sem = load_toml_data("overrides.toml.d", sem)
142+
overrides_sem = load_toml_data(os.path.join("overrides.toml.d", sem))
139143

140144
# The key needs to be in BOTH fireroad and catalog to make it:
141145
# If it's not in Fireroad, it's not offered in this semester (fall, etc.).
@@ -150,11 +154,17 @@ def run() -> None:
150154
term_info = get_term_info(sem)
151155
url_name = term_info["urlName"]
152156

153-
obj: dict[str, dict[str, Any] | str | dict[Any, dict[str, Any]]] = {
154-
"termInfo": term_info,
155-
"lastUpdated": now,
156-
"classes": courses,
157-
}
157+
pe_data = {}
158+
for quarter in get_pe_quarters(url_name):
159+
pe_file = f"pe-q{quarter}.json"
160+
pe_overrides_file = os.path.join("pe", f"pe-q{quarter}-overrides.toml")
161+
if os.path.isfile(os.path.join(package_dir, pe_file)):
162+
quarter_data = load_json_data(pe_file)
163+
quarter_overrides = load_toml_data(pe_overrides_file)
164+
pe_data[quarter] = merge_data(
165+
datasets=[quarter_data, quarter_overrides],
166+
keys_to_keep=set(quarter_data),
167+
)
158168

159169
with open(
160170
os.path.join(
@@ -163,7 +173,16 @@ def run() -> None:
163173
mode="w",
164174
encoding="utf-8",
165175
) as file:
166-
json.dump(obj, file, separators=(",", ":"))
176+
json.dump(
177+
{
178+
"termInfo": term_info,
179+
"lastUpdated": now,
180+
"classes": courses,
181+
"pe": pe_data,
182+
},
183+
file,
184+
separators=(",", ":"),
185+
)
167186

168187
print(f"{url_name}: got {len(courses)} courses")
169188

0 commit comments

Comments
 (0)