From def1a6b0386920e6c6e57d0d454fd116260c0985 Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 13 Apr 2025 21:36:02 -0400 Subject: [PATCH 1/2] Improve socials extraction and testing --- data/examples/europython/speakers.json | 4 +- pyproject.toml | 1 + src/models/europython.py | 247 ++++++++++++++++------- tests/test_extract_socials.py | 134 ++++++++++++ tests/test_extract_socials_hypothesis.py | 132 ++++++++++++ tests/test_extract_socials_negative.py | 81 ++++++++ tests/test_social_media_extractions.py | 71 ------- uv.lock | 33 +++ 8 files changed, 562 insertions(+), 141 deletions(-) create mode 100644 tests/test_extract_socials.py create mode 100644 tests/test_extract_socials_hypothesis.py create mode 100644 tests/test_extract_socials_negative.py delete mode 100644 tests/test_social_media_extractions.py diff --git a/data/examples/europython/speakers.json b/data/examples/europython/speakers.json index 1bb1025..a8008f3 100644 --- a/data/examples/europython/speakers.json +++ b/data/examples/europython/speakers.json @@ -8,8 +8,8 @@ "submissions": ["A8CD3F"], "affiliation": "A Company", "homepage": null, - "gitx": "https://github.com/F3DC8A", - "linkedin_url": "https://www.linkedin.com/in/F3DC8A", + "gitx_url": "https://github.com/f3dc8a", + "linkedin_url": "https://linkedin.com/in/f3dc8a", "bluesky_url": "https://bsky.app/profile/username.bsky.social", "mastodon_url": null, "twitter_url": null, diff --git a/pyproject.toml b/pyproject.toml index e9cd62e..1356f34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ [dependency-groups] dev = [ + "hypothesis>=6.131", "pre-commit>=4.2", "pytest>=8.3.5", "ruff>=0.11.4", diff --git a/src/models/europython.py b/src/models/europython.py index ce2c5d2..8731ec0 100644 --- a/src/models/europython.py +++ b/src/models/europython.py @@ -1,6 +1,8 @@ from __future__ import annotations +import re from datetime import date, datetime +from urllib.parse import quote from pydantic import BaseModel, Field, computed_field, field_validator, model_validator @@ -29,7 +31,7 @@ class EuroPythonSpeaker(BaseModel): mastodon_url: str | None = None linkedin_url: str | None = None bluesky_url: str | None = None - gitx: str | None = None + gitx_url: str | None = None @computed_field def website_url(self) -> str: @@ -50,93 +52,210 @@ def extract_answers(cls, values) -> dict: values["homepage"] = answer.answer_text if answer.question_text == SpeakerQuestion.twitter: - values["twitter_url"] = cls.extract_twitter_url( - answer.answer_text.strip().split()[0] - ) + values["twitter_url"] = cls.extract_twitter_url(answer.answer_text) if answer.question_text == SpeakerQuestion.mastodon: - values["mastodon_url"] = cls.extract_mastodon_url( - answer.answer_text.strip().split()[0] - ) + values["mastodon_url"] = cls.extract_mastodon_url(answer.answer_text) if answer.question_text == SpeakerQuestion.bluesky: - values["bluesky_url"] = cls.extract_bluesky_url( - answer.answer_text.strip().split()[0] - ) + values["bluesky_url"] = cls.extract_bluesky_url(answer.answer_text) if answer.question_text == SpeakerQuestion.linkedin: - values["linkedin_url"] = cls.extract_linkedin_url( - answer.answer_text.strip().split()[0] - ) + values["linkedin_url"] = cls.extract_linkedin_url(answer.answer_text) if answer.question_text == SpeakerQuestion.gitx: - values["gitx"] = answer.answer_text.strip().split()[0] + values["gitx_url"] = cls.extract_gitx_url(answer.answer_text) return values @staticmethod - def extract_twitter_url(text: str) -> str: + def extract_twitter_url(text: str) -> str | None: """ - Extract the Twitter URL from the answer + Extracts a Twitter profile URL from the given text. + Cleans the input and handles following formats: + - @username + - username + - twitter.com/username + - x.com/username """ - if text.startswith("@"): - twitter_url = f"https://x.com/{text[1:]}" - elif not text.startswith(("https://", "http://", "www.")): - twitter_url = f"https://x.com/{text}" - else: - twitter_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) + cleaned = EuroPythonSpeaker._clean_social_input(text) + if cleaned is None: + print(f"Invalid Twitter URL: {text}") + return None - return twitter_url.split("?")[0] + # https://twitter.com/username (username max 15 chars) + match = re.match(r"^(twitter\.com|x\.com)/([\w]{1,15})$", cleaned) + if match: + _, username = match.groups() + return f"https://x.com/{username}" + + # only username + if re.match(r"^[\w]{1,15}$", cleaned): + return f"https://x.com/{cleaned}" + + print(f"Invalid Twitter URL: {cleaned}") + return None @staticmethod - def extract_mastodon_url(text: str) -> None | str: + def extract_mastodon_url(text: str) -> str | None: """ - Normalize Mastodon handle or URL to the format: https:///@ + Extracts a Mastodon profile URL from the given text. + Supports formats like: + - @username@instance + - username@instance + - instance/@username + - instance/@username@instance (with redirect) + Returns: https:///@ """ - text = text.strip().split("?", 1)[0] - - # Handle @username@instance or username@instance formats - if "@" in text and not text.startswith("http"): - parts = text.split("@") - if len(parts) == 3: # @username@instance - _, username, instance = parts - elif len(parts) == 2: # username@instance - username, instance = parts - else: - return None + cleaned = EuroPythonSpeaker._clean_social_input(text) + if not cleaned: + print(f"Invalid Mastodon URL: {text}") + return None + + # instance/@username + match = re.match(r"^([\w\.-]+)/@([\w\.-]+)$", cleaned) + if match: + instance, username = match.groups() return f"https://{instance}/@{username}" - # Handle full URLs - if text.startswith("http://"): - text = "https://" + text[len("http://") :] + parts = cleaned.split("@") + if len(parts) == 3: # instance@username@instance + _, username, instance = parts + elif len(parts) == 2: # username@instance + username, instance = parts + else: + print(f"Invalid Mastodon URL: {cleaned}") + return None + + if username and instance: + return f"https://{instance}/@{username}" - return text + print(f"Invalid Mastodon URL: {cleaned}") + return None @staticmethod - def extract_linkedin_url(text: str) -> str: + def extract_linkedin_url(text: str) -> str | None: """ - Extract the LinkedIn URL from the answer + Extracts a LinkedIn personal profile URL from the given text. + Cleans the input and handles formats like: + - username + - linkedin.com/in/username + - @username + - tr.linkedin.com/in/username (country subdomains) """ - if text.startswith("in/"): - linkedin_url = f"https://linkedin.com/{text}" - elif not text.startswith(("https://", "http://", "www.", "linkedin.")): - linkedin_url = f"https://linkedin.com/in/{text}" + cleaned = EuroPythonSpeaker._clean_social_input(text) + if cleaned is None: + print(f"Invalid LinkedIn URL: {text}") + return None + + if cleaned.startswith("in/"): + linkedin_url = f"https://linkedin.com/{cleaned}" + elif not cleaned.startswith(("linkedin.", "in/")) and "." not in cleaned: + linkedin_url = f"https://linkedin.com/in/{cleaned}" else: - linkedin_url = ( - f"https://{text.removeprefix('https://').removeprefix('http://')}" - ) + linkedin_url = f"https://{cleaned}" + + if not re.match( + r"^https://([\w-]+\.)?linkedin\.com/in/(?:[\w\-]|%[0-9A-Fa-f]{2})+$", + linkedin_url, + ): + print(f"Invalid LinkedIn URL: {linkedin_url}") + return None - return linkedin_url.split("?")[0] + return linkedin_url @staticmethod - def extract_bluesky_url(text: str) -> str: + def extract_bluesky_url(text: str) -> str | None: """ - Returns a normalized BlueSky URL in the form https://bsky.app/profile/.bsky.social, - or uses the entire domain if it's custom (e.g., .dev). + Extracts a Bluesky profile URL from the given text. + Cleans the input and handles formats like: + - username + - bsky.app/profile/username + - bsky/username + - username.dev + - @username + - username.bsky.social """ - text = text.strip().split("?", 1)[0] + cleaned = EuroPythonSpeaker._clean_social_input(text) + if cleaned is None: + print(f"Invalid Bluesky URL: {text}") + return None + + for marker in ("bsky.app/profile/", "bsky/"): + if marker in cleaned: + cleaned = cleaned.split(marker, 1)[1] + break + else: + cleaned = cleaned.rsplit("/", 1)[-1] + + if "." not in cleaned: + cleaned += ".bsky.social" + + bluesky_url = f"https://bsky.app/profile/{cleaned}" + + if not re.match(r"^https://bsky\.app/profile/[\w\.-]+\.[\w\.-]+$", bluesky_url): + print(f"Invalid Bluesky URL: {bluesky_url}") + return None + + return bluesky_url + + @staticmethod + def extract_gitx_url(text: str) -> str | None: + """ + Extracts a GitHub/GitLab URL from the given text. + Cleans the input and handles formats like: + - username + - github.com/username + - gitlab.com/username + - @username + """ + cleaned = EuroPythonSpeaker._clean_social_input(text) + if cleaned is None: + print(f"Invalid GitHub/GitLab URL: {text}") + return None + + if cleaned.startswith(("github.com/", "gitlab.com/")): + return f"https://{cleaned}" + + if re.match(r"^[\w-]+$", cleaned): # assume github.com + return f"https://github.com/{cleaned}" + + print(f"Invalid GitHub/GitLab URL: {cleaned}") + return None + + @staticmethod + def _is_blank_or_na(text: str) -> bool: + """ + Check if the text is blank or (equals "N/A" or "-") + """ + return not text or text.strip().lower() in {"n/a", "-"} + + @staticmethod + def _clean_social_input(text: str) -> str | None: + """ + Cleans the input string for social media URLs. + Returns None if the input is blank or "N/A", + removes prefixes like "LinkedIn: " or "GH: ", + removes parameters like "?something=true", + removes trailing slashes, + removes "http://" or "https://", + removes "www." prefix, + removes "@" prefix, + and decodes URL-encoded characters. + """ + if EuroPythonSpeaker._is_blank_or_na(text): + print(f"Blank or N/A input: {text}") + return None + + text = text.strip() + + # Handle inputs like "LinkedIn: https://linkedin.com/in/username" + # or "GH: https://github.com/username" + text = text.split(" ", 1)[1] if ": " in text else text + + text = text.split("?", 1)[0] + text = text.split(",", 1)[0] + text = text.rstrip("/") if text.startswith("https://"): text = text[8:] @@ -150,19 +269,11 @@ def extract_bluesky_url(text: str) -> str: if text.startswith("@"): text = text[1:] - for marker in ("bsky.app/profile/", "bsky/"): - if marker in text: - text = text.split(marker, 1)[1] - break - # case custom domain - else: - text = text.rsplit("/", 1)[-1] - - # if there's no dot, assume it's a non-custom handle and append '.bsky.social' - if "." not in text: - text += ".bsky.social" + # Percent-encode non-ASCII characters + if not text.isascii(): + text = quote(text, safe="@/-_.+~#=:") - return f"https://bsky.app/profile/{text}" + return text.lower() class EuroPythonSession(BaseModel): diff --git a/tests/test_extract_socials.py b/tests/test_extract_socials.py new file mode 100644 index 0000000..6eb27a0 --- /dev/null +++ b/tests/test_extract_socials.py @@ -0,0 +1,134 @@ +import pytest + +from src.models.europython import EuroPythonSpeaker + + +# === Mastodon === +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("https://mastodon.example/@user123", "https://mastodon.example/@user123"), + ("http://mastodon.example/@user123", "https://mastodon.example/@user123"), + ( + "https://mastodon.example/@user123?ref=xyz", + "https://mastodon.example/@user123", + ), + ("@user123@mastodon.example", "https://mastodon.example/@user123"), + ("user123@mastodon.example", "https://mastodon.example/@user123"), + ("mastodon.example/@user123", "https://mastodon.example/@user123"), + ("www.mastodon.example/@user123", "https://mastodon.example/@user123"), + (" mastodon.example/@user123 ", "https://mastodon.example/@user123"), + ("https://instance.social/@foobar", "https://instance.social/@foobar"), + ("foobar@instance.social", "https://instance.social/@foobar"), + ], +) +def test_extract_mastodon_url(input_string, result): + assert EuroPythonSpeaker.extract_mastodon_url(input_string) == result + + +# === LinkedIn === +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("user123", "https://linkedin.com/in/user123"), + ("in/user123", "https://linkedin.com/in/user123"), + ("linkedin.com/in/user123", "https://linkedin.com/in/user123"), + ("http://linkedin.com/in/user123", "https://linkedin.com/in/user123"), + ("https://linkedin.com/in/user123", "https://linkedin.com/in/user123"), + ("https://www.linkedin.com/in/user123", "https://linkedin.com/in/user123"), + ( + "https://linkedin.com/in/example-user-%C3%A3-encoded", + "https://linkedin.com/in/example-user-%c3%a3-encoded", + ), + ( + "https://linkedin.com/in/example-user-ÃĢ-encoded", + "https://linkedin.com/in/example-user-%c3%a3-encoded", + ), + ("https://linkedin.com/in/user123?ref=xyz", "https://linkedin.com/in/user123"), + (" LINKEDIN.COM/IN/USER123 ", "https://linkedin.com/in/user123"), + ( + "https://regional.linkedin.com/in/example", + "https://regional.linkedin.com/in/example", + ), + ], +) +def test_extract_linkedin_url(input_string, result): + assert EuroPythonSpeaker.extract_linkedin_url(input_string) == result + + +# === Bluesky === +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("user123", "https://bsky.app/profile/user123.bsky.social"), + ("@user123", "https://bsky.app/profile/user123.bsky.social"), + ("user123.bsky.social", "https://bsky.app/profile/user123.bsky.social"), + ("@user123.bsky.social", "https://bsky.app/profile/user123.bsky.social"), + ("user123.dev", "https://bsky.app/profile/user123.dev"), + ("bsky.app/profile/user123", "https://bsky.app/profile/user123.bsky.social"), + ("bsky/user123", "https://bsky.app/profile/user123.bsky.social"), + ( + "www.bsky.app/profile/user123", + "https://bsky.app/profile/user123.bsky.social", + ), + ( + "www.bsky.app/profile/user123.bsky.social", + "https://bsky.app/profile/user123.bsky.social", + ), + ( + "http://bsky.app/profile/user123", + "https://bsky.app/profile/user123.bsky.social", + ), + ( + "https://bsky.app/profile/user123", + "https://bsky.app/profile/user123.bsky.social", + ), + ( + "https://bsky.app/profile/user123.dev", + "https://bsky.app/profile/user123.dev", + ), + ( + "https://bsky.app/profile/user123.bsky.social", + "https://bsky.app/profile/user123.bsky.social", + ), + (" BSKY.APP/PROFILE/USER123 ", "https://bsky.app/profile/user123.bsky.social"), + ], +) +def test_extract_bluesky_url(input_string, result): + assert EuroPythonSpeaker.extract_bluesky_url(input_string) == result + + +# === Twitter/X === +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("user123", "https://x.com/user123"), + ("@user123", "https://x.com/user123"), + ("twitter.com/user123", "https://x.com/user123"), + ("https://twitter.com/user123", "https://x.com/user123"), + ("https://x.com/user123", "https://x.com/user123"), + ("http://twitter.com/user123", "https://x.com/user123"), + ("TWITTER.COM/user_name", "https://x.com/user_name"), + (" user123 ", "https://x.com/user123"), + ], +) +def test_extract_twitter_url(input_string, result): + assert EuroPythonSpeaker.extract_twitter_url(input_string) == result + + +# === GitHub/GitLab === +@pytest.mark.parametrize( + ("input_string", "result"), + [ + ("user123", "https://github.com/user123"), + ("@user123", "https://github.com/user123"), + ("github.com/user123", "https://github.com/user123"), + ("https://github.com/user123", "https://github.com/user123"), + ("gitlab.com/user123", "https://gitlab.com/user123"), + ("https://gitlab.com/user123", "https://gitlab.com/user123"), + (" http://github.com/user123 ", "https://github.com/user123"), + ("GITHUB.COM/USER123", "https://github.com/user123"), + ], +) +def test_extract_gitx(input_string, result): + assert EuroPythonSpeaker.extract_gitx_url(input_string) == result diff --git a/tests/test_extract_socials_hypothesis.py b/tests/test_extract_socials_hypothesis.py new file mode 100644 index 0000000..436b6cc --- /dev/null +++ b/tests/test_extract_socials_hypothesis.py @@ -0,0 +1,132 @@ +from string import ascii_letters, digits +from urllib.parse import quote + +import pytest +from hypothesis import given +from hypothesis.strategies import characters, composite, one_of, sampled_from, text + +from src.models.europython import EuroPythonSpeaker + +ALPHABET_SAFE = ascii_letters + digits + "-_" +SAFE_USERNAME = text( + alphabet=ALPHABET_SAFE, + min_size=4, + max_size=15, +).filter(lambda x: any(c.isalnum() for c in x) and not x.startswith(("-", "_"))) + + +@composite +def domain_names(draw, allow_subdomains=False): + tlds = draw( + sampled_from(["com", "net", "org", "dev", "social", "app", "io", "xyz"]) + ) + label = text(alphabet=ascii_letters + digits + "-", min_size=1, max_size=10) + labels = [draw(label)] + if allow_subdomains: + labels.insert(0, draw(label)) + return ".".join(labels + [tlds]) + + +# === Mastodon === +@given(SAFE_USERNAME) +def test_mastodon_invalid_fallbacks(text: str): + result = EuroPythonSpeaker.extract_mastodon_url(text) + if "@" not in text and "/@" not in text: + assert result is None + + +@given(SAFE_USERNAME, domain_names(allow_subdomains=True)) +def test_mastodon_username_at_instance(username: str, domain: str): + input_str = f"{username}@{domain}" + expected = f"https://{domain.lower()}/@{username.lower()}" + assert EuroPythonSpeaker.extract_mastodon_url(input_str) == expected + + +@given(domain_names(allow_subdomains=True), SAFE_USERNAME) +def test_mastodon_instance_slash_at_username(instance: str, username: str): + input_str = f"{instance}/@{username}" + expected = f"https://{instance.lower()}/@{username.lower()}" + assert EuroPythonSpeaker.extract_mastodon_url(input_str) == expected + + +# === LinkedIn === +@given(SAFE_USERNAME) +def test_linkedin_handle(username: str): + expected = f"https://linkedin.com/in/{username.lower()}" + assert EuroPythonSpeaker.extract_linkedin_url(username) == expected + + +@given( + text( + alphabet=one_of( + characters(whitelist_categories=("Ll", "Lu", "Nd")), # letters and numbers + characters( ## special characters (accents, etc.) + whitelist_categories=("Ll", "Lu", "Nl", "No", "Mn", "Mc"), + ).filter(lambda c: not c.isascii() and not c.isspace()), + ), + min_size=1, + max_size=30, + ) +) +def test_linkedin_encoding_support(username: str): + encoded = quote(username, safe="@/-_.+~#=:") + expected = f"https://linkedin.com/in/{encoded.lower()}" + assert ( + EuroPythonSpeaker.extract_linkedin_url(f"https://linkedin.com/in/{username}") + == expected + ) + + +@given(domain_names().filter(lambda d: not d.endswith("linkedin.com")), SAFE_USERNAME) +def test_linkedin_nonsense_domains(domain: str, path: str): + assert EuroPythonSpeaker.extract_linkedin_url(f"{domain}/in/{path}") is None + + +# === Bluesky === +@given(SAFE_USERNAME) +def test_bluesky_handle_to_fallback_domain(handle: str): + expected = f"https://bsky.app/profile/{handle.lower()}.bsky.social" + assert EuroPythonSpeaker.extract_bluesky_url(handle) == expected + + +@given(domain_names()) +def test_bluesky_custom_domains(domain: str): + expected = f"https://bsky.app/profile/{domain.lower()}" + assert EuroPythonSpeaker.extract_bluesky_url(domain) == expected + + +# === Twitter / X === +@given( + text(alphabet=ascii_letters + digits + "_", min_size=4, max_size=15), +) +def test_twitter_usernames(handle: str): + expected = f"https://x.com/{handle.lower()}" + assert EuroPythonSpeaker.extract_twitter_url(handle) == expected + assert EuroPythonSpeaker.extract_twitter_url(f"@{handle}") == expected + assert ( + EuroPythonSpeaker.extract_twitter_url(f"https://twitter.com/{handle}") + == expected + ) + assert EuroPythonSpeaker.extract_twitter_url(f"https://x.com/{handle}") == expected + assert ( + EuroPythonSpeaker.extract_twitter_url(f"http://twitter.com/{handle}/") + == expected + ) + + +# === GitHub / GitLab === +@given(SAFE_USERNAME) +def test_github_usernames(username: str): + expected = f"https://github.com/{username.lower()}" + assert EuroPythonSpeaker.extract_gitx_url(username) == expected + assert EuroPythonSpeaker.extract_gitx_url(f"@{username}") == expected + assert EuroPythonSpeaker.extract_gitx_url(f"github.com/{username}") == expected + + +@given(SAFE_USERNAME) +def test_gitlab_usernames(username: str): + expected = f"https://gitlab.com/{username.lower()}" + assert EuroPythonSpeaker.extract_gitx_url(f"gitlab.com/{username}") == expected + assert ( + EuroPythonSpeaker.extract_gitx_url(f"https://gitlab.com/{username}") == expected + ) diff --git a/tests/test_extract_socials_negative.py b/tests/test_extract_socials_negative.py new file mode 100644 index 0000000..f69e52b --- /dev/null +++ b/tests/test_extract_socials_negative.py @@ -0,0 +1,81 @@ +import pytest + +from src.models.europython import EuroPythonSpeaker + + +# === Mastodon === +@pytest.mark.parametrize( + ("input_string",), + [ + ("",), + ("false",), + ("mastodon@",), + ("@user@",), + ("username@",), + ("https://mastodon.social/user",), + ("mastodon.social/user",), + ("@mastodon.social",), + ("https://hostux.social/users/username",), + ("https://social/@",), + ], +) +def test_mastodon_url_invalid(input_string): + assert EuroPythonSpeaker.extract_mastodon_url(input_string) is None + + +# === LinkedIn === +@pytest.mark.parametrize( + ("input_string",), + [ + ("",), + ("/in/",), + ("linkedin.com/in/",), + ("linkedin.com/in",), + ("linkedin.com/username",), + ("linkedin.net/in/username",), + ("http://linkedin.com/user",), + ("http://",), + ("n/a",), + ], +) +def test_linkedin_url_invalid(input_string): + assert EuroPythonSpeaker.extract_linkedin_url(input_string) is None + + +# === Twitter / X === +@pytest.mark.parametrize( + ("input_string",), + [ + ("",), + ("-",), + ("user-name",), + ("x.com/",), + ("https://x.com/",), + ("https://twitter.com/",), + ("http://",), + ("@user@",), + ("user!",), + ("too_long_username_123",), + ("twitter.com/user-with-hyphen",), + ("https://github.com/user",), + ], +) +def test_twitter_url_invalid(input_string): + assert EuroPythonSpeaker.extract_twitter_url(input_string) is None + + +# === GitHub / GitLab === +@pytest.mark.parametrize( + ("input_string",), + [ + ("",), + ("https://bitbucket.org/user",), + ("bitbucket.org/user",), + ("codeberg.org/user",), + ("https://git.example.com/user",), + ("http://",), + ("@/",), + ], +) +def test_gitx_url_invalid(input_string): + assert EuroPythonSpeaker.extract_gitx_url(input_string) is None diff --git a/tests/test_social_media_extractions.py b/tests/test_social_media_extractions.py deleted file mode 100644 index 5c13a50..0000000 --- a/tests/test_social_media_extractions.py +++ /dev/null @@ -1,71 +0,0 @@ -import pytest - -from src.models.europython import EuroPythonSpeaker - - -@pytest.mark.parametrize( - ("input_string", "result"), - [ - ("http://mastodon.social/@username", "https://mastodon.social/@username"), - ("https://mastodon.social/@username", "https://mastodon.social/@username"), - ( - "https://mastodon.social/@username?something=true", - "https://mastodon.social/@username", - ), - ("@username@mastodon.social", "https://mastodon.social/@username"), - ("username@mastodon.social", "https://mastodon.social/@username"), - ], -) -def test_extract_mastodon_url(input_string: str, result: str) -> None: - assert EuroPythonSpeaker.extract_mastodon_url(input_string) == result - - -@pytest.mark.parametrize( - ("input_string", "result"), - [ - ("username", "https://linkedin.com/in/username"), - ("linkedin.com/in/username", "https://linkedin.com/in/username"), - ("in/username", "https://linkedin.com/in/username"), - ("www.linkedin.com/in/username", "https://www.linkedin.com/in/username"), - ("http://linkedin.com/in/username", "https://linkedin.com/in/username"), - ("https://linkedin.com/in/username", "https://linkedin.com/in/username"), - ], -) -def test_extract_linkedin_url(input_string: str, result: str) -> None: - assert EuroPythonSpeaker.extract_linkedin_url(input_string) == result - - -@pytest.mark.parametrize( - ("input_string", "result"), - [ - ("username", "https://bsky.app/profile/username.bsky.social"), - ("@username", "https://bsky.app/profile/username.bsky.social"), - ("username.dev", "https://bsky.app/profile/username.dev"), - ("@username.dev", "https://bsky.app/profile/username.dev"), - ("username.bsky.social", "https://bsky.app/profile/username.bsky.social"), - ("bsky.app/profile/username", "https://bsky.app/profile/username.bsky.social"), - ("bsky/username", "https://bsky.app/profile/username.bsky.social"), - ( - "www.bsky.app/profile/username", - "https://bsky.app/profile/username.bsky.social", - ), - ( - "www.bsky.app/profile/username.bsky.social", - "https://bsky.app/profile/username.bsky.social", - ), - ( - "http://bsky.app/profile/username", - "https://bsky.app/profile/username.bsky.social", - ), - ( - "https://bsky.app/profile/username.com", - "https://bsky.app/profile/username.com", - ), - ( - "https://bsky.app/profile/username.bsky.social", - "https://bsky.app/profile/username.bsky.social", - ), - ], -) -def test_extract_bluesky_url(input_string: str, result: str) -> None: - assert EuroPythonSpeaker.extract_bluesky_url(input_string) == result diff --git a/uv.lock b/uv.lock index 0f54919..76b45b1 100644 --- a/uv.lock +++ b/uv.lock @@ -11,6 +11,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "attrs" +version = "25.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 }, +] + [[package]] name = "certifi" version = "2025.1.31" @@ -91,6 +100,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 }, ] +[[package]] +name = "hypothesis" +version = "6.131.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/79/82eaf131f58a5c434830f0c196995a5071531765c51ebc8aaff493002b5e/hypothesis-6.131.0.tar.gz", hash = "sha256:4b807daeeee47852edfd9818ba0e33df14902f1b78a5524f1a3fb71f80c7cec3", size = 430541 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/43/b5e5d397e1ece9b71584c1771214b4a643d81fa8f197d4ff5938295ead25/hypothesis-6.131.0-py3-none-any.whl", hash = "sha256:734959017e3ee4ef8f0ecb4e5169c8f4cf96dc83a997d2edf01fb5350f5bf2f4", size = 495720 }, +] + [[package]] name = "identify" version = "2.6.9" @@ -184,6 +206,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "hypothesis" }, { name = "pre-commit" }, { name = "pytest" }, { name = "ruff" }, @@ -200,6 +223,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "hypothesis", specifier = ">=6.131.0" }, { name = "pre-commit", specifier = ">=4.2" }, { name = "pytest", specifier = ">=8.3.5" }, { name = "ruff", specifier = ">=0.11.4" }, @@ -364,6 +388,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4f/03/3aec4846226d54a37822e4c7ea39489e4abd6f88388fba74e3d4abe77300/ruff-0.11.4-py3-none-win_arm64.whl", hash = "sha256:d435db6b9b93d02934cf61ef332e66af82da6d8c69aefdea5994c89997c7a0fc", size = 10450306 }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 }, +] + [[package]] name = "text-unidecode" version = "1.3" From 645538b455bff38302f94f4827e40da5792625be Mon Sep 17 00:00:00 2001 From: egeakman Date: Sun, 13 Apr 2025 22:08:27 -0400 Subject: [PATCH 2/2] Some documentation --- README.md | 123 +++++++++++--- data/examples/README.md | 361 ++++++++++++++++++++-------------------- 2 files changed, 277 insertions(+), 207 deletions(-) diff --git a/README.md b/README.md index 9cd7e0f..d8d175e 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,114 @@ -# programapi +# ðŸŽĪ programapi -This project downloads, processes, saves, and serves the static JSON files containing details of accepted speakers and submissions via an API. +This project powers the **EuroPython 2025** website and Discord bot by downloading, transforming, and serving clean, structured JSON files for sessions, speakers, and the schedule, all pulled from Pretalx. -Used by the EuroPython 2024 website and the Discord bot. +Built for transparency. Designed for reuse. Optimized for EuroPython. -**What this project does step-by-step:** +--- -1. Downloads the Pretalx speaker and submission data, and saves it as JSON files. -2. Transforms the JSON files into a format that is easier to work with and OK to serve publicly. This includes removing unnecessary/private fields, and adding new fields. -3. Serves the JSON files via an API. +## 🚀 What This Project Does -## Installation +1. **Downloads** submission and speaker data from Pretalx. +2. **Transforms** raw data: + - Removes private/irrelevant fields + - Normalizes formats + - Adds computed fields (e.g. URLs, delivery mode) +3. **Serves** the transformed JSON files via a static API. -1. Clone the repository. -2. Install the dependency management tool: ``make deps/pre`` -3. Install the dependencies: ``make deps/install`` -4. Set up ``pre-commit``: ``make pre-commit`` +--- -## Configuration +## ⚙ïļ Installation -You can change the event in the [``config.py``](src/config.py) file. It is set to ``europython-2024`` right now. +1. **Clone the repo** + ```bash + git clone https://github.com/EuroPython/programapi.git + cd programapi + ``` -## Usage +2. **Install [uv](https://docs.astral.sh/uv/getting-started/installation/)** (fast Python package manager) -- Run the whole process: ``make all`` -- Run only the download process: ``make download`` -- Run only the transformation process: ``make transform`` +3. **Create a Python 3.13 virtual environment** + ```bash + uv venv -p 3.13 + ``` -**Note:** Don't forget to set ``PRETALX_TOKEN`` in your ``.env`` file at the root of the project. And please don't make too many requests to the Pretalx API, it might get angry ðŸĪŠ +4. **Install dev dependencies** + ```bash + make dev + ``` -## API +5. **Enable pre-commit hooks** + ```bash + make pre-commit + ``` -The API is served at ``https://programapi24.europython.eu/2024``. It has two endpoints (for now): +--- -- ``/speakers.json``: Returns the list of confirmed speakers. -- ``/sessions.json``: Returns the list of confirmed sessions. +## 🛠ïļ Configuration -## Schema +You can update the event year or shortname in [`src/config.py`](src/config.py). -See [this page](data/examples/README.md) for the explanations of the fields in the returned JSON files. +Also, create a `.env` file in the project root and set: + +```env +PRETALX_TOKEN=your_api_token_here +``` + +(Yes, Pretalx has rate limits. Please be nice. ðŸĪŠ) + +--- + +## ðŸ“Ķ Usage + +- Run the **entire pipeline**: + ```bash + make all + ``` + +- Run only the **download step**: + ```bash + make download + ``` + +- Run only the **transformation step**: + ```bash + make transform + ``` + +- (Optional) **Exclude components**: + ```bash + make all EXCLUDE="schedule youtube" + ``` + +--- + +## 🌐 API Endpoints + +Hosted at: + +``` +https://static.europython.eu/programme/ep2025/releases/current +``` + +| Endpoint | Description | +|---------------------|--------------------------------------------| +| `/speakers.json` | List of confirmed speakers | +| `/sessions.json` | List of confirmed sessions | +| `/schedule.json` | Finalized conference schedule *(TBA)* | + +--- + +## 📖 Schema Documentation + +Looking for field definitions and examples? +Check out the 👉 [`data/examples/README.md`](data/examples/README.md) for a full schema reference with example payloads and explanations. + +--- + +## 💎 Questions? Feedback? + +Feel free to open an issue or reach us at [infra@europython.eu](mailto:infra@europython.eu). We love contributors 💜 + +--- + +📅 Last updated for: **EuroPython 2025** diff --git a/data/examples/README.md b/data/examples/README.md index c54e02e..5992b3d 100644 --- a/data/examples/README.md +++ b/data/examples/README.md @@ -1,96 +1,14 @@ -# Explaining the Output Data +# 📄 ProgramAPI Output Documentation -**Note:** Some of the fields may be `null` or empty (`""`). +> ⚠ïļ Some fields may be `null`, `""`, or excluded from specific contexts. +> 🍭 Also, yes, Rick Astley may appear in test videos. You're welcome. -## `sessions.json` +--- -
-Example session data JSON - -```json -{ - "A1B2C3": { - "code": "A1B2C3", - "title": "Example talk", - "speakers": [ - "B4D5E6", - ... - ], - "session_type": "Talk", - "slug": "example-talk", - "track": "Some Track", - "state": "confirmed", - "abstract": "This is an example talk. It is a great talk.", - "tweet": "This is an example talk.", - "duration": "60", - "level": "intermediate", - "delivery": "in-person", - "resources": [ - { - "resource": "https://example.com/slides.pdf", - "description": "Slides for the session" - } - ... - ], - "room": "South Hall 2A", - "start": "2099-07-10T14:00:00+02:00", - "end": "2099-07-10T15:00:00+02:00", - "website_url": "https://ep2099.europython.eu/session/example-talk/", - "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ&pp=ygUJcmljayByb2xs", - "sessions_in_parallel": [ - "F7G8H9", - ... - ], - "sessions_after": [ - "I0J1K2", - ... - ], - "sessions_before": [ - "L3M4N5", - ... - ], - "next_session": "O6P7Q8", - "prev_session": "R9S0T1" - }, -} -``` -
- -  - -The fields are as follows: - -| Key | Type | Notes | -|------------------------|-------------------------------------------|---------------------------------------------------------------| -| `code` | `string` | Unique identifier for the session | -| `title` | `string` | Title of the session | -| `speakers` | `array[string]` | List of codes of the speakers | -| `session_type` | `string` | Type of the session (e.g. Talk, Workshop, Poster, etc.) | -| `slug` | `string` | URL-friendly version of the title | -| `track` | `string` \| `null` | Track of the session (e.g. PyData, Web, etc.) | -| `abstract` | `string` | Abstract of the session | -| `tweet` | `string` | Tweet-length description of the session | -| `duration` | `string` | Duration of the session in minutes | -| `level` | `string` | Level of the session (e.g. beginner, intermediate, advanced) | -| `delivery` | `string` | Delivery mode of the session (e.g. in-person, remote) | -| `resources` | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": , "description": }` | -| `room` | `string` \| `null` | Room where the session will be held | -| `start` | `string (datetime ISO format)` \| `null` | Start time of the session | -| `end` | `string (datetime ISO format)` \| `null` | End time of the session | -| `website_url` | `string` | URL of the session on the conference website | -| `youtube_url` | `string` \| `null` | URL of the session's video on YouTube | -| `sessions_in_parallel` | `array[string]` \| `null` | List of codes of sessions happening in parallel | -| `sessions_after` | `array[string]` \| `null` | List of codes of sessions happening after this session | -| `sessions_before` | `array[string]` \| `null` | List of codes of sessions happening before this session | -| `next_session` | `string` \| `null` | Code of the next session in the same room | -| `prev_session` | `string` \| `null` | Code of the previous session in the same room | - -  - -## `speakers.json` +## ðŸ—Ģ `speakers.json`
-Example speaker data JSON +Example speaker data ```json { @@ -100,119 +18,198 @@ The fields are as follows: "biography": "Some bio", "avatar": "https://pretalx.com/media/avatars/picture.jpg", "slug": "a-speaker", - "submissions": [ - "A1B2C3", - ... - ], + "submissions": ["A1B2C3"], "affiliation": "A Company", "homepage": "https://example.com", - "gitx": "https://github.com/B4D5E6", - "linkedin_url": "https://www.linkedin.com/in/B4D5E6", + "twitter_url": "https://x.com/B4D5E6", + "linkedin_url": "https://linkedin.com/in/B4D5E6", "mastodon_url": "https://mastodon.social/@B4D5E6", - "twitter_url": "https://x.com/B4D5E6" - }, - ... + "bluesky_url": "https://bsky.app/profile/B4D5E6.bsky.social", + "gitx_url": "https://github.com/B4D5E6", + "website_url": "https://ep2099.europython.eu/speaker/a-speaker" + } } ```
-  - -The fields are as follows: +### Fields | Key | Type | Notes | |----------------|--------------------|-----------------------------------------------------------------------| | `code` | `string` | Unique identifier for the speaker | -| `name` | `string` | Name of the speaker | -| `biography` | `string` \| `null` | Biography of the speaker | -| `avatar` | `string` | URL of the speaker's avatar | -| `slug` | `string` | URL-friendly version of the name | -| `submissions` | `array[string]` | List of codes of the sessions the speaker is speaking at | -| `affiliation` | `string` \| `null` | Affiliation of the speaker | -| `homepage` | `string` \| `null` | URL/text of the speaker's homepage | -| `gitx` | `string` \| `null` | URL/text of the speaker's GitHub/GitLab/etc. profile | -| `linkedin_url` | `string` \| `null` | URL of the speaker's LinkedIn profile | -| `twitter_url` | `string` \| `null` | URL of the speaker's Twitter profile | -| `mastodon_url` | `string` \| `null` | URL of the speaker's Mastodon profile | -| `website_url` | `string` | URL of the speaker's profile on the conference website | - -  - -## `schedule.json` +| `name` | `string` | Full name of the speaker | +| `biography` | `string` \| `null` | Short biography | +| `avatar` | `string` | URL of speaker's avatar | +| `slug` | `string` | URL-safe speaker name | +| `submissions` | `array[string]` | Codes of sessions the speaker is involved in | +| `affiliation` | `string` \| `null` | Affiliated institution or organization | +| `homepage` | `string` \| `null` | Personal or professional homepage | +| `twitter_url` | `string` \| `null` | Normalized Twitter/X profile URL | +| `mastodon_url` | `string` \| `null` | Normalized Mastodon profile URL | +| `linkedin_url` | `string` \| `null` | Normalized LinkedIn profile URL | +| `bluesky_url` | `string` \| `null` | Normalized Bluesky profile URL | +| `gitx_url` | `string` \| `null` | Normalized GitHub/GitLab profile URL | +| `website_url` | `string` | Auto-generated speaker profile on the EuroPython site | + +--- + +## 📚 `sessions.json`
-Example schedule data JSON +Example session data ```json { - "days": { - "2099-07-08": { - "events": [ - { - "code": "LMN123", - "title": "Welcome and Keynote", - "speakers": [], - "session_type": "Announcements", - "slug": "welcome-keynote", - "track": null, - "level": "beginner", - "rooms": [ - "Room A", - "Room B" - ], - "start": "2099-07-08T08:00:00+02:00", - "duration": 60, - "tweet": "", - "website_url": "https://ep2099.europython.eu/session/welcome-keynote" - }, - { - "code": "OPQ456", - "title": "Advanced Python Techniques", - "speakers": [ - { - "avatar": "https://pretalx.com/media/avatars/picture.jpg", - "code": "RST789", - "name": "John Doe", - "slug": "john-doe", - "website_url": "https://ep2099.europython.eu/speaker/john-doe" - } - ], - "session_type": "Tutorial", - "slug": "advanced-python-techniques", - "track": "CPython Internals", - "level": "advanced", - "rooms": [ - "Room C" - ], - "start": "2099-07-08T10:00:00+02:00", - "duration": 90, - "tweet": "", - "website_url": "https://ep2099.europython.eu/advanced-python-techniques" - } - ] + "A1B2C3": { + "code": "A1B2C3", + "title": "Example talk", + "speakers": ["B4D5E6"], + "session_type": "Talk", + "slug": "example-talk", + "track": "Some Track", + "abstract": "This is an example talk.", + "tweet": "This is an example talk.", + "duration": "60", + "level": "intermediate", + "delivery": "in-person", + "resources": [ + { + "resource": "https://example.com/slides.pdf", + "description": "Slides for the session" + } + ], + "room": "South Hall 2A", + "start": "2099-07-10T14:00:00+02:00", + "end": "2099-07-10T15:00:00+02:00", + "website_url": "https://ep2099.europython.eu/session/example-talk", + "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "sessions_in_parallel": ["F7G8H9"], + "sessions_after": ["I0J1K2"], + "sessions_before": ["L3M4N5"], + "next_session": "O6P7Q8", + "prev_session": "R9S0T1" + } +} +``` +
+ +### Fields + +| Key | Type | Notes | +|------------------------|-------------------------------------------|-----------------------------------------------------------------| +| `code` | `string` | Unique session identifier | +| `title` | `string` | Title of the session | +| `speakers` | `array[string]` | List of speaker codes | +| `session_type` | `string` | Type of session (e.g. Talk, Workshop) | +| `slug` | `string` | URL-friendly session name | +| `track` | `string` \| `null` | Associated track | +| `abstract` | `string` | Abstract or session description | +| `tweet` | `string` | Short summary (tweet-style) | +| `duration` | `string` | Duration in minutes as string | +| `level` | `string` | Level of session (e.g. beginner, intermediate) | +| `delivery` | `string` | Delivery format (in-person or remote) | +| `resources` | `array[object]` \| `null` | Supplementary materials (URL and description) | +| `room` | `string` \| `null` | Assigned room (e.g. "Exhibit Hall" auto-mapped for posters) | +| `start` | `datetime` \| `null` | ISO datetime of start | +| `end` | `datetime` \| `null` | ISO datetime of end | +| `website_url` | `string` | URL of session on the EuroPython site | +| `youtube_url` | `string` \| `null` | YouTube link to session video (ðŸŽĩ never gonna give you up?) | +| `sessions_in_parallel` | `array[string]` \| `null` | Session codes running at the same time | +| `sessions_after` | `array[string]` \| `null` | Sessions immediately after this one | +| `sessions_before` | `array[string]` \| `null` | Sessions immediately before this one | +| `next_session` | `string` \| `null` | Next session in the same room | +| `prev_session` | `string` \| `null` | Previous session in the same room | + +--- + +## 🗓ïļ `schedule.json` + +
+Example schedule day + +```json +{ + "days": { + "2099-07-08": { + "rooms": ["Room A", "Room B", "Room C"], + "events": [ + { + "event_type": "SESSION", + "code": "OPQ456", + "slug": "advanced-python", + "title": "Advanced Python Techniques", + "session_type": "Tutorial", + "speakers": [ + { + "code": "RST789", + "name": "John Doe", + "avatar": "https://pretalx.com/media/avatars/picture.jpg", + "slug": "john-doe", + "website_url": "https://ep2099.europython.eu/speaker/john-doe" + } + ], + "track": "CPython Internals", + "tweet": "", + "level": "advanced", + "rooms": ["Room C"], + "start": "2099-07-08T10:00:00+02:00", + "duration": 90, + "website_url": "https://ep2099.europython.eu/session/advanced-python" + }, + { + "event_type": "BREAK", + "title": "Coffee Break", + "duration": 30, + "rooms": ["Room A", "Room B"], + "start": "2099-07-08T11:30:00+02:00" } + ] } + } } ```
-  - -The fields are as follows: - -| Key | Type | Notes | -|----------------|-----------------------------|------------------------------------------------------------| -| `days` | `object` | Contains schedule by date | -| `events` | `array[object]` | List of events for a particular day | -| `code` | `string` | Unique identifier for the event | -| `title` | `string` | Title of the event | -| `speakers` | `array[object]` | List of speakers for the event (if applicable) | -| `session_type` | `string` | Type of event (e.g. Announcements, Workshop, etc.) | -| `slug` | `string` | URL-friendly version of the event title | -| `track` | `string` \| `null` | Track associated with the event (e.g. Web, PyData, etc.) | -| `level` | `string` | Level of the event (beginner, intermediate, advanced) | -| `rooms` | `array[string]` | List of rooms the event is being held in | -| `start` | `string (datetime ISO)` | Start time of the event | -| `duration` | `integer` | Duration of the event in minutes | -| `tweet` | `string` \| `null` | Tweet-length description of the event | -| `website_url` | `string` | URL of the event on the conference website | +### Fields + +| Key | Type | Notes | +|-------------|----------------------------------|------------------------------------------------------| +| `days` | `dict[date, DaySchedule]` | Schedule grouped by day | +| `rooms` | `list[string]` | All rooms active on that day | +| `events` | `list[Session or Break]` | Mixed list of sessions and breaks | + +#### `Session` (EventType = `"SESSION"`) + +| Field | Type | Notes | +|---------------|--------------------------------------|-----------------------------------------------------| +| `event_type` | `"SESSION"` | Constant | +| `code` | `string` | Session code | +| `slug` | `string` | URL-friendly name | +| `title` | `string` | Title of the session | +| `session_type`| `string` | Talk, Workshop, etc. | +| `speakers` | `array[Speaker]` | Mini speaker profiles | +| `track` | `string` \| `null` | Optional topic track | +| `tweet` | `string` | Short description | +| `level` | `string` | Beginner, Intermediate, etc. | +| `rooms` | `array[string]` | One or more rooms | +| `start` | `datetime` | ISO 8601 | +| `duration` | `int` | Computed from `total_duration / slot_count` | +| `website_url` | `string` | Link to session page | + +#### `Break` (EventType = `"BREAK"`) + +| Field | Type | Notes | +|---------------|-------------------|--------------------------------| +| `event_type` | `"BREAK"` | Constant | +| `title` | `string` | Name of the break | +| `duration` | `int` | Minutes | +| `rooms` | `array[string]` | Rooms where the break applies | +| `start` | `datetime` | Start time | + +--- + +### 🛠 Notes & Logic + +- `room` normalization maps `"Main Hall"` sessions to `"Exhibit Hall"` — Poster sessions rejoice! +- All `"Registration & Welcome"` events automatically include **all active rooms**. +- Various `social_*_url` fields handle malformed inputs like `@name`, full URLs, or just `username`.