Skip to content

Commit aa329a8

Browse files
committed
generate schedules table
This is to set up for aggregate quarter map views.
1 parent f9f6e0e commit aa329a8

File tree

4 files changed

+296
-13
lines changed

4 files changed

+296
-13
lines changed

.github/workflows/github-pages.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@ jobs:
5555
run: make aggregate-all-routes
5656

5757
- name: Make Routes Table
58-
run: make _data/routes.yml
58+
run: make routes-yml
59+
60+
- name: Make Schedules Table
61+
run: make schedules-yml
5962

6063
- name: Build Calendar
6164
run: make rcc.ics

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ _site/
44

55
# generated from route db and gpx files
66
_data/routes.yml
7+
_data/schedules.yml
78
routes/geojson/*.geojson
89
routes/_transit_data
910

Makefile

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
DATA = _data
55
ROUTES = routes
66

7+
# all quarter schedule files
8+
SCHEDULES = $(wildcard $(DATA)/schedules/*.yml)
9+
710
# route files and derived versions (normalized GPX and GeoJSON)
811
ROUTES_RAW_GPX := $(wildcard $(ROUTES)/_gpx/*.gpx)
912
ROUTES_NORMGPX := $(patsubst $(ROUTES)/_gpx/%.gpx, $(ROUTES)/gpx/%.gpx, $(ROUTES_RAW_GPX))
1013
ROUTES_GEOJSON := $(patsubst $(ROUTES)/_gpx/%.gpx, $(ROUTES)/geojson/%.geojson, $(ROUTES_RAW_GPX))
1114

12-
# all quarter schedule files
13-
SCHEDULES = $(wildcard $(DATA)/schedules/*.yml)
14-
1515
# aggregate GeoJSON files (all routes for each quarter, and all routes overall)
1616
AGG_GEOJSON_DIR := $(ROUTES)/geojson/aggregates
1717
AGG_GEOJSON_ROUTES_QTR := $(patsubst $(DATA)/schedules/%.yml, $(AGG_GEOJSON_DIR)/%.geojson, $(SCHEDULES))
@@ -24,12 +24,18 @@ MUNGED_ROUTES := \
2424
$(AGG_GEOJSON_ROUTES_QTR) \
2525
$(AGG_GEOJSON_ROUTES_ALL)
2626

27+
# tables for page generation from Jekyll templates
28+
ROUTES_YML := $(DATA)/routes.yml
29+
SCHEDULES_YML := $(DATA)/schedules.yml
30+
31+
# all tables for page generation from Jekyll templates
32+
PAGE_TABLES := \
33+
$(ROUTES_YML) \
34+
$(SCHEDULES_YML)
35+
2736
TRANSIT_DATA = routes/transit_data
2837
TRANSIT_DATA_CSV = $(wildcard routes/transit_data/*.csv)
2938

30-
ROUTES_YML = $(DATA)/routes.yml
31-
SCHEDULE = $(DATA)/schedule.yml
32-
3339
JEKYLL_FLAGS ?=
3440
URL_BASE_PATH ?=
3541

@@ -44,15 +50,28 @@ all: check-schedules build
4450

4551
# build the site
4652
.PHONY: build
47-
build: $(MUNGED_ROUTES) \
48-
$(ROUTES_YML) \
49-
rcc.ics
53+
build: $(MUNGED_ROUTES) $(PAGE_TABLES) rcc.ics
5054
bundle exec jekyll build $(JEKYLL_FLAGS)
5155

5256
# build main "routes database" YAML file from all normalized route GPX files
5357
$(ROUTES_YML): _bin/make_routes_table.py $(ROUTES_NORMGPX)
5458
uv run python3 $< $(ROUTES_NORMGPX) $@
5559

60+
# alias to make routes YAML
61+
.PHONY: routes-yml
62+
routes-yml: $(ROUTES_YML)
63+
64+
# build main "schedules database" YAML file from all quarter schedule files
65+
$(SCHEDULES_YML): _bin/make_schedules_table.py $(AGG_GEOJSON_ROUTES_QTR)
66+
uv run python3 $< \
67+
--schedules-dir $(DATA)/schedules \
68+
--aggregates-dir $(AGG_GEOJSON_DIR) \
69+
--output $@
70+
71+
# alias to make schedules YAML
72+
.PHONY: schedules-yml
73+
schedules-yml: $(SCHEDULES_YML)
74+
5675
# generate ical from schedule YAML, also generates rcc_weekends.ics
5776
rcc.ics: _bin/mkical.py $(ROUTES_YML)
5877
uv run python3 $<
@@ -66,9 +85,7 @@ all-with-previews: all route-previews-generate
6685

6786
# serve the site locally with auto-rebuild on changes
6887
.PHONY: serve
69-
serve: $(MUNGED_ROUTES) \
70-
$(ROUTES_YML) \
71-
rcc.ics
88+
serve: $(MUNGED_ROUTES) $(PAGE_TABLES) rcc.ics
7289
ls _config.yml | entr -r bundle exec jekyll serve --watch --drafts --host=0.0.0.0 $(JEKYLL_FLAGS)
7390

7491

_bin/make_schedules_table.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
"""
2+
Generate a YAML summary for quarter schedules with their aggregate GeoJSON.
3+
4+
This is modeled after make_routes_table.py but focused on schedule metadata.
5+
The script discovers schedule YAML files, validates the content, attaches
6+
links to the pre-generated aggregate GeoJSON, and writes an ordered YAML list
7+
that can be consumed by Jekyll data pages.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import argparse
13+
import datetime as dt
14+
import sys
15+
from dataclasses import dataclass
16+
from pathlib import Path
17+
from typing import Iterable
18+
19+
import yaml
20+
21+
import rcr
22+
23+
_SEASON_SEQUENCE = ["winter", "spring", "summer", "autumn"]
24+
25+
SEASON_LABELS = {season: season.title() for season in _SEASON_SEQUENCE}
26+
SEASON_ORDER = {season: index for index, season in enumerate(_SEASON_SEQUENCE)}
27+
28+
29+
@dataclass
30+
class ScheduleRecord:
31+
"""Container for the data we emit per schedule."""
32+
33+
id: str
34+
year: int
35+
season: str
36+
aggregate_geojson: Path
37+
start_date: str
38+
end_date: str
39+
event_count: int
40+
route_count: int
41+
manual_route_count: int
42+
unique_date_count: int
43+
previous_id: str | None = None
44+
next_id: str | None = None
45+
46+
@property
47+
def label(self) -> str:
48+
return f"{SEASON_LABELS.get(self.season, self.season.title())} {self.year}"
49+
50+
def to_serializable(self) -> dict[str, object]:
51+
"""Convert to the dict layout we want in YAML."""
52+
data: dict[str, object] = {
53+
"id": self.id,
54+
"label": self.label,
55+
"year": self.year,
56+
"season": self.season,
57+
"aggregate_geojson": self._public_geojson_path(),
58+
"start_date": self.start_date,
59+
"end_date": self.end_date,
60+
"event_count": self.event_count,
61+
"unique_date_count": self.unique_date_count,
62+
"route_count": self.route_count,
63+
"manual_route_count": self.manual_route_count,
64+
}
65+
if self.previous_id:
66+
data["previous_id"] = self.previous_id
67+
if self.next_id:
68+
data["next_id"] = self.next_id
69+
return data
70+
71+
def _public_geojson_path(self) -> str:
72+
"""Return the site-relative path to the aggregate GeoJSON."""
73+
try:
74+
rel_path = self.aggregate_geojson.relative_to(rcr.ROOT)
75+
except ValueError:
76+
rel_path = self.aggregate_geojson
77+
return "/" + rel_path.as_posix()
78+
79+
80+
def parse_args(argv: Iterable[str]) -> argparse.Namespace:
81+
parser = argparse.ArgumentParser(description="Build schedule metadata table for Jekyll.")
82+
parser.add_argument(
83+
"--schedules-dir",
84+
type=Path,
85+
default=rcr.SCHEDULES,
86+
help="Directory containing quarter schedule YAML files (default: %(default)s)",
87+
)
88+
parser.add_argument(
89+
"--aggregates-dir",
90+
type=Path,
91+
default=rcr.ROOT / "routes" / "geojson" / "aggregates",
92+
help="Directory containing aggregate GeoJSON files (default: %(default)s)",
93+
)
94+
parser.add_argument(
95+
"--output",
96+
type=Path,
97+
required=True,
98+
help="Destination YAML file to write.",
99+
)
100+
return parser.parse_args(list(argv))
101+
102+
103+
def load_schedule(path: Path) -> list[dict]:
104+
with path.open("r") as handle:
105+
data = yaml.safe_load(handle)
106+
if not isinstance(data, list):
107+
raise ValueError(f"Schedule file '{path}' is not a list.")
108+
return data
109+
110+
111+
def validate_schedule_entries(schedule_id: str, entries: list[dict]) -> None:
112+
if not entries:
113+
raise ValueError(f"Schedule '{schedule_id}' has no entries.")
114+
for entry in entries:
115+
if "date" not in entry:
116+
raise ValueError(f"Schedule '{schedule_id}' entry {entry!r} missing 'date'.")
117+
if "plan" not in entry:
118+
raise ValueError(f"Schedule '{schedule_id}' entry {entry!r} missing 'plan'.")
119+
if not isinstance(entry["plan"], list):
120+
raise ValueError(f"Schedule '{schedule_id}' entry for {entry['date']} has non-list plan.")
121+
122+
123+
def derive_dates(entries: list[dict]) -> tuple[str, str, int]:
124+
dates = []
125+
for entry in entries:
126+
# Accept ISO date strings; fall back to string if parsing fails.
127+
date_str = entry["date"]
128+
try:
129+
parsed = dt.date.fromisoformat(date_str)
130+
dates.append(parsed)
131+
except ValueError:
132+
# Keep the original order if parsing fails (but record the string).
133+
dates.append(date_str)
134+
first = dates[0]
135+
last = dates[-1]
136+
if isinstance(first, dt.date):
137+
start = first.isoformat()
138+
else:
139+
start = str(first)
140+
if isinstance(last, dt.date):
141+
end = last.isoformat()
142+
else:
143+
end = str(last)
144+
unique_dates = {entry["date"] for entry in entries}
145+
return start, end, len(unique_dates)
146+
147+
148+
def count_plan_entries(entries: list[dict]) -> tuple[int, int]:
149+
route_count = 0
150+
manual_count = 0
151+
for entry in entries:
152+
for phase in entry["plan"]:
153+
if "cancelled" in phase:
154+
continue
155+
if "route_id" in phase:
156+
route_count += 1
157+
elif "route" in phase:
158+
manual_count += 1
159+
return route_count, manual_count
160+
161+
162+
def build_record(path: Path, aggregate_dir: Path) -> ScheduleRecord:
163+
schedule_id = path.stem
164+
try:
165+
year_str, season = schedule_id.split("-", 1)
166+
except ValueError as exc:
167+
raise ValueError(f"Schedule filename '{path.name}' must be in '<year>-<season>.yml' form.") from exc
168+
169+
if season not in SEASON_LABELS:
170+
raise ValueError(f"Unknown season '{season}' in schedule '{schedule_id}'.")
171+
172+
try:
173+
year = int(year_str)
174+
except ValueError as exc:
175+
raise ValueError(f"Schedule '{schedule_id}' year must be numeric.") from exc
176+
177+
entries = load_schedule(path)
178+
validate_schedule_entries(schedule_id, entries)
179+
start_date, end_date, unique_date_count = derive_dates(entries)
180+
route_count, manual_route_count = count_plan_entries(entries)
181+
182+
aggregate_path = (aggregate_dir / f"{schedule_id}.geojson").resolve()
183+
if not aggregate_path.exists():
184+
raise FileNotFoundError(f"Missing aggregate GeoJSON for '{schedule_id}': {aggregate_path}")
185+
186+
return ScheduleRecord(
187+
id=schedule_id,
188+
year=year,
189+
season=season,
190+
aggregate_geojson=aggregate_path,
191+
start_date=start_date,
192+
end_date=end_date,
193+
event_count=len(entries),
194+
route_count=route_count,
195+
manual_route_count=manual_route_count,
196+
unique_date_count=unique_date_count,
197+
)
198+
199+
200+
def attach_navigation(records: list[ScheduleRecord]) -> None:
201+
for index, record in enumerate(records):
202+
if index > 0:
203+
record.previous_id = records[index - 1].id
204+
if index + 1 < len(records):
205+
record.next_id = records[index + 1].id
206+
207+
208+
def write_yaml(records: list[ScheduleRecord], output: Path) -> None:
209+
output.parent.mkdir(parents=True, exist_ok=True)
210+
with output.open("w") as handle:
211+
handle.write("# AUTOGENERATED - DO NOT EDIT\n\n")
212+
yaml.safe_dump(
213+
[record.to_serializable() for record in records],
214+
handle,
215+
sort_keys=False,
216+
width=120,
217+
default_flow_style=False,
218+
)
219+
handle.write("\n")
220+
221+
222+
def main(argv: Iterable[str]) -> int:
223+
args = parse_args(argv)
224+
schedules_dir = args.schedules_dir.resolve()
225+
aggregates_dir = args.aggregates_dir.resolve()
226+
if not schedules_dir.is_dir():
227+
print(f"Error: schedules directory '{schedules_dir}' does not exist.", file=sys.stderr)
228+
return 1
229+
if not aggregates_dir.is_dir():
230+
print(f"Error: aggregates directory '{aggregates_dir}' does not exist.", file=sys.stderr)
231+
return 1
232+
233+
schedule_paths = list(sorted(schedules_dir.glob("*.yml"), key=lambda p: p.stem))
234+
if not schedule_paths:
235+
print(f"Error: no schedule files found in '{schedules_dir}'.", file=sys.stderr)
236+
return 1
237+
238+
records: list[ScheduleRecord] = []
239+
for path in schedule_paths:
240+
try:
241+
records.append(build_record(path, aggregates_dir))
242+
except Exception as exc: # pragma: no cover - defensive reporting
243+
print(f"Error processing schedule '{path}': {exc}", file=sys.stderr)
244+
return 1
245+
246+
# order chronologically using rcr.schedule_paths to respect season ordering
247+
expected_order = {path.stem: index for index, path in enumerate(rcr.schedule_paths())}
248+
def sort_key(record: ScheduleRecord) -> tuple[int, int, int]:
249+
expected = expected_order.get(record.id)
250+
if expected is not None:
251+
return (0, expected, 0)
252+
return (1, record.year, SEASON_ORDER.get(record.season, 0))
253+
254+
records.sort(key=sort_key)
255+
256+
attach_navigation(records)
257+
write_yaml(records, args.output.resolve())
258+
return 0
259+
260+
261+
if __name__ == "__main__":
262+
raise SystemExit(main(sys.argv[1:]))

0 commit comments

Comments
 (0)