Skip to content

Commit db9d021

Browse files
committed
Add exclude option
1 parent dc4ba37 commit db9d021

File tree

5 files changed

+116
-43
lines changed

5 files changed

+116
-43
lines changed

Makefile

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,28 @@
11
# Variables for the project
22
# =========================
3-
CONFERENCE ?= ep2024
3+
CONFERENCE ?= ep2025
44
DATA_DIR ?= ./data/public/$(CONFERENCE)/
55

66
# Variables for remote host
77
# =========================
88
VPS_USER ?= static_content_user
99
VPS_HOST ?= static.europython.eu
10-
VPS_PATH ?= /home/$(VPS_USER)/content/programapi/$(CONFERENCE)/releases
10+
VPS_PATH ?= /home/$(VPS_USER)/content/static/programme/$(CONFERENCE)/releases
1111
REMOTE_CMD=ssh $(VPS_USER)@$(VPS_HOST)
1212

1313
# Variables for deploy
14-
# ==========================
14+
# ====================
1515
TIMESTAMP ?= $(shell date +%Y%m%d%H%M%S)
1616
FORCE_DEPLOY ?= false
1717

18+
# Optional arguments
19+
# ==================
20+
EXCLUDE ?=
21+
WARN_DUPES ?= false
22+
23+
# Convert EXCLUDE space-separated list to repeated --exclude flags
24+
EXCLUDE_FLAGS = $(foreach item,$(EXCLUDE),--exclude $(item))
25+
1826
dev:
1927
uv sync --dev
2028

@@ -27,13 +35,13 @@ deps/install:
2735
install: deps/install
2836

2937
download:
30-
python -m src.download
38+
python -m src.download $(EXCLUDE_FLAGS)
3139

3240
transform:
3341
ifeq ($(WARN_DUPES), true)
34-
python -m src.transform --warn-dupes
42+
python -m src.transform $(EXCLUDE_FLAGS) --warn-dupes
3543
else
36-
python -m src.transform
44+
python -m src.transform $(EXCLUDE_FLAGS)
3745
endif
3846

3947
all: download transform

src/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66

77
class Config:
8-
event = "europython-2024"
9-
event_dir_name = "ep2024"
8+
event = "europython-2025"
9+
event_dir_name = "ep2025"
1010
project_root = Path(__file__).resolve().parents[1]
1111
raw_path = Path(f"{project_root}/data/raw/{event_dir_name}")
1212
public_path = Path(f"{project_root}/data/public/{event_dir_name}")

src/download.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,23 @@
11
import json
2+
from argparse import ArgumentParser
23
from typing import Any
34

45
import requests
56
from tqdm import tqdm
67

78
from src.config import Config
89

10+
parser = ArgumentParser(description="Download Pretalx data for EuroPython processing.")
11+
parser.add_argument(
12+
"-e",
13+
"--exclude",
14+
choices=["schedule", "youtube"],
15+
action="append",
16+
help="Exclude certain resources from download.",
17+
)
18+
args = parser.parse_args()
19+
exclude = set(args.exclude or [])
20+
921
headers = {
1022
"Accept": "application/json, text/javascript",
1123
"Authorization": f"Token {Config.token()}",
@@ -14,24 +26,27 @@
1426
base_url = f"https://pretalx.com/api/events/{Config.event}/"
1527
schedule_url = base_url + "schedules/latest/"
1628

29+
# Build resource list dynamically based on exclusions
1730
resources = [
18-
# Questions need to be passed to include answers in the same endpoint,
19-
# saving us later time with joining the answers.
2031
"submissions?questions=all&state=confirmed",
2132
"speakers?questions=all",
22-
"p/youtube",
2333
]
2434

35+
if "youtube" not in exclude:
36+
resources.append("p/youtube")
37+
2538
Config.raw_path.mkdir(parents=True, exist_ok=True)
2639

2740
for resource in resources:
28-
url = base_url + f"{resource}"
41+
# To get the resource name without extra parameters
42+
resource_name = resource.split("?")[0].split("/")[-1]
43+
url = base_url + resource
2944

3045
res0: list[dict[str, Any]] = []
3146
data: dict[str, Any] = {"next": url}
3247
n = 0
3348

34-
pbar = tqdm(desc=f"Downloading {resource}", unit=" page", dynamic_ncols=True)
49+
pbar = tqdm(desc=f"Downloading {resource_name}", unit=" page", dynamic_ncols=True)
3550

3651
while url := data["next"]:
3752
n += 1
@@ -46,24 +61,25 @@
4661

4762
pbar.close()
4863

49-
# To get the resource name without extra parameters
50-
filename = resource.split("?")[0].split("/")[-1]
51-
filename = f"{filename}_latest.json"
64+
# Save the data to a file
65+
filename = f"{resource_name}_latest.json"
5266
filepath = Config.raw_path / filename
5367

5468
with open(filepath, "w") as fd:
5569
json.dump(res0, fd)
5670

71+
# Download schedule unless excluded
72+
if "schedule" not in exclude:
73+
print("Downloading schedule...", end="")
74+
response = requests.get(schedule_url, headers=headers)
5775

58-
# Download schedule
59-
response = requests.get(schedule_url, headers=headers)
76+
if response.status_code != 200:
77+
raise Exception(f"Error {response.status_code}: {response.text}")
6078

61-
if response.status_code != 200:
62-
raise Exception(f"Error {response.status_code}: {response.text}")
63-
64-
data = response.json()
65-
filename = "schedule_latest.json"
66-
filepath = Config.raw_path / filename
79+
data = response.json()
80+
filename = "schedule_latest.json"
81+
filepath = Config.raw_path / filename
6782

68-
with open(filepath, "w") as fd:
69-
json.dump(data, fd)
83+
with open(filepath, "w") as fd:
84+
json.dump(data, fd)
85+
print(" done.")

src/transform.py

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import sys
1+
from argparse import ArgumentParser
22

33
from src.config import Config
44
from src.utils.parse import Parse
@@ -7,43 +7,88 @@
77
from src.utils.utils import Utils
88

99
if __name__ == "__main__":
10-
print(f"Parsing the data from {Config.raw_path}...")
10+
parser = ArgumentParser(description="Transform data from Pretalx to EuroPython format and save it.")
11+
parser.add_argument(
12+
"-w",
13+
"--warn-dupes",
14+
action="store_true",
15+
help="Warn about duplicates in the data.",
16+
)
17+
parser.add_argument(
18+
"-e",
19+
"--exclude",
20+
choices=["schedule", "youtube"],
21+
action="append",
22+
help="Exclude certain data from transformation.",
23+
)
24+
args = parser.parse_args()
25+
exclude = set(args.exclude or [])
26+
27+
28+
print(f"Parsing submissions from {Config.raw_path}/submissions_latest.json...", end="")
1129
pretalx_submissions = Parse.publishable_submissions(
1230
Config.raw_path / "submissions_latest.json"
1331
)
32+
print(" done.")
33+
34+
print(f"\nParsing speakers from {Config.raw_path}/speakers_latest.json...", end="")
1435
pretalx_speakers = Parse.publishable_speakers(
1536
Config.raw_path / "speakers_latest.json", pretalx_submissions.keys()
1637
)
17-
pretalx_schedule = Parse.schedule(Config.raw_path / "schedule_latest.json")
38+
print(" done.")
1839

19-
# Parse the YouTube data
20-
youtube_data = Parse.youtube(Config.raw_path / "youtube_latest.json")
40+
if "youtube" not in exclude:
41+
print(f"Parsing YouTube data from {Config.raw_path}/youtube_latest.json...", end="")
42+
youtube_data = Parse.youtube(Config.raw_path / "youtube_latest.json")
43+
print(" done.")
44+
else:
45+
youtube_data = {}
2146

22-
print("Computing timing relationships...")
47+
print("\nComputing timing relationships...", end="")
2348
TimingRelationships.compute(pretalx_submissions.values())
49+
print(" done.")
2450

25-
print("Transforming the data...")
51+
print("\nTransforming submissions...", end="")
2652
ep_sessions = Transform.pretalx_submissions_to_europython_sessions(
2753
pretalx_submissions,
2854
youtube_data,
2955
)
56+
print(" done.")
57+
58+
print("\nTransforming speakers...", end="")
3059
ep_speakers = Transform.pretalx_speakers_to_europython_speakers(pretalx_speakers)
31-
ep_schedule = Transform.pretalx_schedule_to_europython_schedule(
32-
pretalx_schedule.breaks, ep_sessions, ep_speakers
33-
)
60+
print(" done.")
3461

3562
# Warn about duplicates if the flag is set
36-
if len(sys.argv) > 1 and sys.argv[1] == "--warn-dupes":
63+
if args.warn_dupes:
3764
Utils.warn_duplicates(
3865
session_attributes_to_check=["title"],
3966
speaker_attributes_to_check=["name"],
4067
sessions_to_check=ep_sessions,
4168
speakers_to_check=ep_speakers,
4269
)
4370

44-
print(f"Writing the data to {Config.public_path}...")
71+
print(f"\nWriting sessions to {Config.public_path}/sessions.json...", end="")
4572
Utils.write_to_file(Config.public_path / "sessions.json", ep_sessions)
73+
print(" done.")
74+
75+
print(f"\nWriting speakers to {Config.public_path}/speakers.json...", end="")
4676
Utils.write_to_file(Config.public_path / "speakers.json", ep_speakers)
47-
Utils.write_to_file(
48-
Config.public_path / "schedule.json", ep_schedule, direct_dump=True
49-
)
77+
print(" done.")
78+
79+
if "schedule" not in exclude:
80+
print("\nParsing schedule from {Config.raw_path}/schedule_latest.json...", end="")
81+
pretalx_schedule = Parse.schedule(Config.raw_path / "schedule_latest.json")
82+
print(" done.")
83+
84+
print(f"\nTransforming the schedule...", end="")
85+
ep_schedule = Transform.pretalx_schedule_to_europython_schedule(
86+
pretalx_schedule.breaks, ep_sessions, ep_speakers
87+
)
88+
print(" done.")
89+
90+
print(f"\nWriting schedule to {Config.public_path}/schedule.json...", end="")
91+
Utils.write_to_file(
92+
Config.public_path / "schedule.json", ep_schedule, direct_dump=True
93+
)
94+
print(" done.")

src/utils/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def warn_duplicates(
7575
Warns about duplicate attributes in the given objects
7676
"""
7777
print(
78-
f"Checking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions..."
78+
f"\nChecking for duplicate {'s, '.join(session_attributes_to_check)}s in sessions...",
79+
end="",
7980
)
8081
duplicate_sessions = Utils.find_duplicate_attributes(
8182
sessions_to_check, session_attributes_to_check
@@ -84,9 +85,11 @@ def warn_duplicates(
8485
for attribute, codes in duplicate_sessions.items():
8586
if len(codes) > 1:
8687
print(f"Duplicate ``{attribute}`` in sessions: {codes}")
88+
print(" done.")
8789

8890
print(
89-
f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers..."
91+
f"Checking for duplicate {'s, '.join(speaker_attributes_to_check)}s in speakers...",
92+
end="",
9093
)
9194
duplicate_speakers = Utils.find_duplicate_attributes(
9295
speakers_to_check, speaker_attributes_to_check
@@ -95,6 +98,7 @@ def warn_duplicates(
9598
for attribute, codes in duplicate_speakers.items():
9699
if len(codes) > 1:
97100
print(f"Duplicate ``{attribute}`` in speakers: {codes}")
101+
print(" done.")
98102

99103
@staticmethod
100104
def compute_unique_slugs_by_attribute(

0 commit comments

Comments
 (0)