diff --git a/data/examples/README.md b/data/examples/README.md index 3ed1383..c54e02e 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -1,4 +1,4 @@ -# Explaining the output data +# Explaining the Output Data **Note:** Some of the fields may be `null` or empty (`""`). @@ -27,19 +27,16 @@ "delivery": "in-person", "resources": [ { - "resource": "https://example.com/notebook.ipynb", - "description": "Notebook used in the talk" - }, - { - "resource": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", - "description": "Video of the robot in action" + "resource": "https://example.com/slides.pdf", + "description": "Slides for the session" } ... ], "room": "South Hall 2A", - "start": "2024-07-10T14:00:00+02:00", - "end": "2024-07-10T15:00:00+02:00", - "website_url": "https://ep2024.europython.eu/session/example-talk/", + "start": "2099-07-10T14:00:00+02:00", + "end": "2099-07-10T15:00:00+02:00", + "website_url": "https://ep2099.europython.eu/session/example-talk/", + "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ&pp=ygUJcmljayByb2xs", "sessions_in_parallel": [ "F7G8H9", ... @@ -81,6 +78,7 @@ The fields are as follows: | `start` | `string (datetime ISO format)` \| `null` | Start time of the session | | `end` | `string (datetime ISO format)` \| `null` | End time of the session | | `website_url` | `string` | URL of the session on the conference website | +| `youtube_url` | `string` \| `null` | URL of the session's video on YouTube | | `sessions_in_parallel` | `array[string]` \| `null` | List of codes of sessions happening in parallel | | `sessions_after` | `array[string]` \| `null` | List of codes of sessions happening after this session | | `sessions_before` | `array[string]` \| `null` | List of codes of sessions happening before this session | @@ -137,3 +135,84 @@ The fields are as follows: | `twitter_url` | `string` \| `null` | URL of the speaker's Twitter profile | | `mastodon_url` | `string` \| `null` | URL of the speaker's Mastodon profile | | `website_url` | `string` | URL of the speaker's profile on the conference website | + +  + +## `schedule.json` + +
+Example schedule data JSON + +```json +{ + "days": { + "2099-07-08": { + "events": [ + { + "code": "LMN123", + "title": "Welcome and Keynote", + "speakers": [], + "session_type": "Announcements", + "slug": "welcome-keynote", + "track": null, + "level": "beginner", + "rooms": [ + "Room A", + "Room B" + ], + "start": "2099-07-08T08:00:00+02:00", + "duration": 60, + "tweet": "", + "website_url": "https://ep2099.europython.eu/session/welcome-keynote" + }, + { + "code": "OPQ456", + "title": "Advanced Python Techniques", + "speakers": [ + { + "avatar": "https://pretalx.com/media/avatars/picture.jpg", + "code": "RST789", + "name": "John Doe", + "slug": "john-doe", + "website_url": "https://ep2099.europython.eu/speaker/john-doe" + } + ], + "session_type": "Tutorial", + "slug": "advanced-python-techniques", + "track": "CPython Internals", + "level": "advanced", + "rooms": [ + "Room C" + ], + "start": "2099-07-08T10:00:00+02:00", + "duration": 90, + "tweet": "", + "website_url": "https://ep2099.europython.eu/advanced-python-techniques" + } + ] + } + } +} +``` +
+ +  + +The fields are as follows: + +| Key | Type | Notes | +|----------------|-----------------------------|------------------------------------------------------------| +| `days` | `object` | Contains schedule by date | +| `events` | `array[object]` | List of events for a particular day | +| `code` | `string` | Unique identifier for the event | +| `title` | `string` | Title of the event | +| `speakers` | `array[object]` | List of speakers for the event (if applicable) | +| `session_type` | `string` | Type of event (e.g. Announcements, Workshop, etc.) | +| `slug` | `string` | URL-friendly version of the event title | +| `track` | `string` \| `null` | Track associated with the event (e.g. Web, PyData, etc.) | +| `level` | `string` | Level of the event (beginner, intermediate, advanced) | +| `rooms` | `array[string]` | List of rooms the event is being held in | +| `start` | `string (datetime ISO)` | Start time of the event | +| `duration` | `integer` | Duration of the event in minutes | +| `tweet` | `string` \| `null` | Tweet-length description of the event | +| `website_url` | `string` | URL of the event on the conference website | diff --git a/data/examples/europython/sessions.json b/data/examples/europython/sessions.json index 530655a..97eb07b 100644 --- a/data/examples/europython/sessions.json +++ b/data/examples/europython/sessions.json @@ -31,7 +31,8 @@ "sessions_before": null, "next_session": null, "prev_session": null, - "website_url": "https://ep2024.europython.eu/session/this-is-a-test-talk-from-a-test-speaker-about-a-test-topic" + "website_url": "https://ep2024.europython.eu/session/this-is-a-test-talk-from-a-test-speaker-about-a-test-topic", + "youtube_url": "https://youtube.com/watch?v=01234567890" }, "B8CD4F": { "code": "B8CD4F", @@ -56,6 +57,7 @@ "sessions_before": null, "next_session": null, "prev_session": null, - "website_url": "https://ep2024.europython.eu/session/a-talk-with-shorter-title" + "website_url": "https://ep2024.europython.eu/session/a-talk-with-shorter-title", + "youtube_url": "https://youtube.com/watch?v=12345679012" } } diff --git a/data/examples/pretalx/youtube.json b/data/examples/pretalx/youtube.json new file mode 100644 index 0000000..7a3d274 --- /dev/null +++ b/data/examples/pretalx/youtube.json @@ -0,0 +1,12 @@ +[ + { + "submission": "A8CD3F", + "youtube_link": "https://youtube.com/watch?v=01234567890", + "video_id": "01234567890" + }, + { + "submission": "B8CD4F", + "youtube_link": "https://youtube.com/watch?v=12345679012", + "video_id": "12345679012" + } +] diff --git a/src/download.py b/src/download.py index 7e988f7..280e45c 100644 --- a/src/download.py +++ b/src/download.py @@ -19,6 +19,7 @@ # saving us later time with joining the answers. "submissions?questions=all&state=confirmed", "speakers?questions=all", + "p/youtube", ] Config.raw_path.mkdir(parents=True, exist_ok=True) @@ -45,7 +46,8 @@ pbar.close() - filename = resource.split("?")[0] # To get rid of "?questions" + # To get the resource name without extra parameters + filename = resource.split("?")[0].split("/")[-1] filename = f"{filename}_latest.json" filepath = Config.raw_path / filename diff --git a/src/models/europython.py b/src/models/europython.py index 291e9f2..1b61c4b 100644 --- a/src/models/europython.py +++ b/src/models/europython.py @@ -142,6 +142,7 @@ class EuroPythonSession(BaseModel): next_session: str | None = None prev_session: str | None = None slot_count: int = Field(..., exclude=True) + youtube_url: str | None = None @field_validator("room", mode="before") @classmethod diff --git a/src/transform.py b/src/transform.py index c44dffb..5feadec 100644 --- a/src/transform.py +++ b/src/transform.py @@ -16,12 +16,16 @@ ) pretalx_schedule = Parse.schedule(Config.raw_path / "schedule_latest.json") + # Parse the YouTube data + youtube_data = Parse.youtube(Config.raw_path / "youtube_latest.json") + print("Computing timing relationships...") TimingRelationships.compute(pretalx_submissions.values()) print("Transforming the data...") ep_sessions = Transform.pretalx_submissions_to_europython_sessions( - pretalx_submissions + pretalx_submissions, + youtube_data, ) ep_speakers = Transform.pretalx_speakers_to_europython_speakers(pretalx_speakers) ep_schedule = Transform.pretalx_schedule_to_europython_schedule( diff --git a/src/utils/parse.py b/src/utils/parse.py index 1a52d61..cfe53e4 100644 --- a/src/utils/parse.py +++ b/src/utils/parse.py @@ -61,3 +61,14 @@ def schedule(input_file: Path | str) -> PretalxSchedule: schedule = PretalxSchedule.model_validate(js) return schedule + + @staticmethod + def youtube(input_file: Path | str) -> dict[str, str]: + """ + Returns the Session code to YouTube URL mapping + """ + with open(input_file) as fd: + js = json.load(fd) + youtube_data = {s["submission"]: s["youtube_link"] for s in js} + + return youtube_data diff --git a/src/utils/transform.py b/src/utils/transform.py index 7ea2e32..ac7d6d9 100644 --- a/src/utils/transform.py +++ b/src/utils/transform.py @@ -15,6 +15,7 @@ class Transform: @staticmethod def pretalx_submissions_to_europython_sessions( submissions: dict[str, PretalxSubmission], + youtube_data: dict[str, str], ) -> dict[str, EuroPythonSession]: """ Transforms the given Pretalx submissions to EuroPython sessions @@ -58,6 +59,7 @@ def pretalx_submissions_to_europython_sessions( next_session=TimingRelationships.get_next_session(submission.code), prev_session=TimingRelationships.get_prev_session(submission.code), slot_count=submission.slot_count, + youtube_url=youtube_data.get(submission.code), ) ep_sessions[code] = ep_session diff --git a/tests/test_transform_end_to_end.py b/tests/test_transform_end_to_end.py index e8315db..640c693 100644 --- a/tests/test_transform_end_to_end.py +++ b/tests/test_transform_end_to_end.py @@ -8,12 +8,15 @@ "./data/examples/pretalx/submissions.json" ) +youtube_data = Parse.youtube("./data/examples/pretalx/youtube.json") + def test_e2e_sessions() -> None: TimingRelationships.compute(pretalx_submissions.values()) ep_sessions = Transform.pretalx_submissions_to_europython_sessions( - pretalx_submissions + pretalx_submissions, + youtube_data, ) ep_sessions_dump = { k: json.loads(v.model_dump_json()) for k, v in ep_sessions.items()