From def1a6b0386920e6c6e57d0d454fd116260c0985 Mon Sep 17 00:00:00 2001
From: egeakman <me@egeakman.dev>
Date: Sun, 13 Apr 2025 21:36:02 -0400
Subject: [PATCH 1/2] Improve socials extraction and testing

---
 data/examples/europython/speakers.json   |   4 +-
 pyproject.toml                           |   1 +
 src/models/europython.py                 | 247 ++++++++++++++++-------
 tests/test_extract_socials.py            | 134 ++++++++++++
 tests/test_extract_socials_hypothesis.py | 132 ++++++++++++
 tests/test_extract_socials_negative.py   |  81 ++++++++
 tests/test_social_media_extractions.py   |  71 -------
 uv.lock                                  |  33 +++
 8 files changed, 562 insertions(+), 141 deletions(-)
 create mode 100644 tests/test_extract_socials.py
 create mode 100644 tests/test_extract_socials_hypothesis.py
 create mode 100644 tests/test_extract_socials_negative.py
 delete mode 100644 tests/test_social_media_extractions.py

diff --git a/data/examples/europython/speakers.json b/data/examples/europython/speakers.json
index 1bb1025..a8008f3 100644
--- a/data/examples/europython/speakers.json
+++ b/data/examples/europython/speakers.json
@@ -8,8 +8,8 @@
     "submissions": ["A8CD3F"],
     "affiliation": "A Company",
     "homepage": null,
-    "gitx": "https://github.com/F3DC8A",
-    "linkedin_url": "https://www.linkedin.com/in/F3DC8A",
+    "gitx_url": "https://github.com/f3dc8a",
+    "linkedin_url": "https://linkedin.com/in/f3dc8a",
     "bluesky_url": "https://bsky.app/profile/username.bsky.social",
     "mastodon_url": null,
     "twitter_url": null,
diff --git a/pyproject.toml b/pyproject.toml
index e9cd62e..1356f34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
 
 [dependency-groups]
 dev = [
+  "hypothesis>=6.131",
   "pre-commit>=4.2",
   "pytest>=8.3.5",
   "ruff>=0.11.4",
diff --git a/src/models/europython.py b/src/models/europython.py
index ce2c5d2..8731ec0 100644
--- a/src/models/europython.py
+++ b/src/models/europython.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
+import re
 from datetime import date, datetime
+from urllib.parse import quote
 
 from pydantic import BaseModel, Field, computed_field, field_validator, model_validator
 
@@ -29,7 +31,7 @@ class EuroPythonSpeaker(BaseModel):
     mastodon_url: str | None = None
     linkedin_url: str | None = None
     bluesky_url: str | None = None
-    gitx: str | None = None
+    gitx_url: str | None = None
 
     @computed_field
     def website_url(self) -> str:
@@ -50,93 +52,210 @@ def extract_answers(cls, values) -> dict:
                 values["homepage"] = answer.answer_text
 
             if answer.question_text == SpeakerQuestion.twitter:
-                values["twitter_url"] = cls.extract_twitter_url(
-                    answer.answer_text.strip().split()[0]
-                )
+                values["twitter_url"] = cls.extract_twitter_url(answer.answer_text)
 
             if answer.question_text == SpeakerQuestion.mastodon:
-                values["mastodon_url"] = cls.extract_mastodon_url(
-                    answer.answer_text.strip().split()[0]
-                )
+                values["mastodon_url"] = cls.extract_mastodon_url(answer.answer_text)
 
             if answer.question_text == SpeakerQuestion.bluesky:
-                values["bluesky_url"] = cls.extract_bluesky_url(
-                    answer.answer_text.strip().split()[0]
-                )
+                values["bluesky_url"] = cls.extract_bluesky_url(answer.answer_text)
 
             if answer.question_text == SpeakerQuestion.linkedin:
-                values["linkedin_url"] = cls.extract_linkedin_url(
-                    answer.answer_text.strip().split()[0]
-                )
+                values["linkedin_url"] = cls.extract_linkedin_url(answer.answer_text)
 
             if answer.question_text == SpeakerQuestion.gitx:
-                values["gitx"] = answer.answer_text.strip().split()[0]
+                values["gitx_url"] = cls.extract_gitx_url(answer.answer_text)
 
         return values
 
     @staticmethod
-    def extract_twitter_url(text: str) -> str:
+    def extract_twitter_url(text: str) -> str | None:
         """
-        Extract the Twitter URL from the answer
+        Extracts a Twitter profile URL from the given text.
+        Cleans the input and handles following formats:
+        - @username
+        - username
+        - twitter.com/username
+        - x.com/username
         """
-        if text.startswith("@"):
-            twitter_url = f"https://x.com/{text[1:]}"
-        elif not text.startswith(("https://", "http://", "www.")):
-            twitter_url = f"https://x.com/{text}"
-        else:
-            twitter_url = (
-                f"https://{text.removeprefix('https://').removeprefix('http://')}"
-            )
+        cleaned = EuroPythonSpeaker._clean_social_input(text)
+        if cleaned is None:
+            print(f"Invalid Twitter URL: {text}")
+            return None
 
-        return twitter_url.split("?")[0]
+        # https://twitter.com/username (username max 15 chars)
+        match = re.match(r"^(twitter\.com|x\.com)/([\w]{1,15})$", cleaned)
+        if match:
+            _, username = match.groups()
+            return f"https://x.com/{username}"
+
+        # only username
+        if re.match(r"^[\w]{1,15}$", cleaned):
+            return f"https://x.com/{cleaned}"
+
+        print(f"Invalid Twitter URL: {cleaned}")
+        return None
 
     @staticmethod
-    def extract_mastodon_url(text: str) -> None | str:
+    def extract_mastodon_url(text: str) -> str | None:
         """
-        Normalize Mastodon handle or URL to the format: https://<instance>/@<username>
+        Extracts a Mastodon profile URL from the given text.
+        Supports formats like:
+        - @username@instance
+        - username@instance
+        - instance/@username
+        - instance/@username@instance (with redirect)
+        Returns: https://<instance>/@<username>
         """
-        text = text.strip().split("?", 1)[0]
-
-        # Handle @username@instance or username@instance formats
-        if "@" in text and not text.startswith("http"):
-            parts = text.split("@")
-            if len(parts) == 3:  # @username@instance
-                _, username, instance = parts
-            elif len(parts) == 2:  # username@instance
-                username, instance = parts
-            else:
-                return None
+        cleaned = EuroPythonSpeaker._clean_social_input(text)
+        if not cleaned:
+            print(f"Invalid Mastodon URL: {text}")
+            return None
+
+        # instance/@username
+        match = re.match(r"^([\w\.-]+)/@([\w\.-]+)$", cleaned)
+        if match:
+            instance, username = match.groups()
             return f"https://{instance}/@{username}"
 
-        # Handle full URLs
-        if text.startswith("http://"):
-            text = "https://" + text[len("http://") :]
+        parts = cleaned.split("@")
+        if len(parts) == 3:  # instance@username@instance
+            _, username, instance = parts
+        elif len(parts) == 2:  # username@instance
+            username, instance = parts
+        else:
+            print(f"Invalid Mastodon URL: {cleaned}")
+            return None
+
+        if username and instance:
+            return f"https://{instance}/@{username}"
 
-        return text
+        print(f"Invalid Mastodon URL: {cleaned}")
+        return None
 
     @staticmethod
-    def extract_linkedin_url(text: str) -> str:
+    def extract_linkedin_url(text: str) -> str | None:
         """
-        Extract the LinkedIn URL from the answer
+        Extracts a LinkedIn personal profile URL from the given text.
+        Cleans the input and handles formats like:
+        - username
+        - linkedin.com/in/username
+        - @username
+        - tr.linkedin.com/in/username (country subdomains)
         """
-        if text.startswith("in/"):
-            linkedin_url = f"https://linkedin.com/{text}"
-        elif not text.startswith(("https://", "http://", "www.", "linkedin.")):
-            linkedin_url = f"https://linkedin.com/in/{text}"
+        cleaned = EuroPythonSpeaker._clean_social_input(text)
+        if cleaned is None:
+            print(f"Invalid LinkedIn URL: {text}")
+            return None
+
+        if cleaned.startswith("in/"):
+            linkedin_url = f"https://linkedin.com/{cleaned}"
+        elif not cleaned.startswith(("linkedin.", "in/")) and "." not in cleaned:
+            linkedin_url = f"https://linkedin.com/in/{cleaned}"
         else:
-            linkedin_url = (
-                f"https://{text.removeprefix('https://').removeprefix('http://')}"
-            )
+            linkedin_url = f"https://{cleaned}"
+
+        if not re.match(
+            r"^https://([\w-]+\.)?linkedin\.com/in/(?:[\w\-]|%[0-9A-Fa-f]{2})+$",
+            linkedin_url,
+        ):
+            print(f"Invalid LinkedIn URL: {linkedin_url}")
+            return None
 
-        return linkedin_url.split("?")[0]
+        return linkedin_url
 
     @staticmethod
-    def extract_bluesky_url(text: str) -> str:
+    def extract_bluesky_url(text: str) -> str | None:
         """
-        Returns a normalized BlueSky URL in the form https://bsky.app/profile/<USERNAME>.bsky.social,
-        or uses the entire domain if it's custom (e.g., .dev).
+        Extracts a Bluesky profile URL from the given text.
+        Cleans the input and handles formats like:
+        - username
+        - bsky.app/profile/username
+        - bsky/username
+        - username.dev
+        - @username
+        - username.bsky.social
         """
-        text = text.strip().split("?", 1)[0]
+        cleaned = EuroPythonSpeaker._clean_social_input(text)
+        if cleaned is None:
+            print(f"Invalid Bluesky URL: {text}")
+            return None
+
+        for marker in ("bsky.app/profile/", "bsky/"):
+            if marker in cleaned:
+                cleaned = cleaned.split(marker, 1)[1]
+                break
+        else:
+            cleaned = cleaned.rsplit("/", 1)[-1]
+
+        if "." not in cleaned:
+            cleaned += ".bsky.social"
+
+        bluesky_url = f"https://bsky.app/profile/{cleaned}"
+
+        if not re.match(r"^https://bsky\.app/profile/[\w\.-]+\.[\w\.-]+$", bluesky_url):
+            print(f"Invalid Bluesky URL: {bluesky_url}")
+            return None
+
+        return bluesky_url
+
+    @staticmethod
+    def extract_gitx_url(text: str) -> str | None:
+        """
+        Extracts a GitHub/GitLab URL from the given text.
+        Cleans the input and handles formats like:
+        - username
+        - github.com/username
+        - gitlab.com/username
+        - @username
+        """
+        cleaned = EuroPythonSpeaker._clean_social_input(text)
+        if cleaned is None:
+            print(f"Invalid GitHub/GitLab URL: {text}")
+            return None
+
+        if cleaned.startswith(("github.com/", "gitlab.com/")):
+            return f"https://{cleaned}"
+
+        if re.match(r"^[\w-]+$", cleaned):  # assume github.com
+            return f"https://github.com/{cleaned}"
+
+        print(f"Invalid GitHub/GitLab URL: {cleaned}")
+        return None
+
+    @staticmethod
+    def _is_blank_or_na(text: str) -> bool:
+        """
+        Check if the text is blank or (equals "N/A" or "-")
+        """
+        return not text or text.strip().lower() in {"n/a", "-"}
+
+    @staticmethod
+    def _clean_social_input(text: str) -> str | None:
+        """
+        Cleans the input string for social media URLs.
+        Returns None if the input is blank or "N/A",
+        removes prefixes like "LinkedIn: " or "GH: ",
+        removes parameters like "?something=true",
+        removes trailing slashes,
+        removes "http://" or "https://",
+        removes "www." prefix,
+        removes "@" prefix,
+        and decodes URL-encoded characters.
+        """
+        if EuroPythonSpeaker._is_blank_or_na(text):
+            print(f"Blank or N/A input: {text}")
+            return None
+
+        text = text.strip()
+
+        # Handle inputs like "LinkedIn: https://linkedin.com/in/username"
+        # or "GH: https://github.com/username"
+        text = text.split(" ", 1)[1] if ": " in text else text
+
+        text = text.split("?", 1)[0]
+        text = text.split(",", 1)[0]
+        text = text.rstrip("/")
 
         if text.startswith("https://"):
             text = text[8:]
@@ -150,19 +269,11 @@ def extract_bluesky_url(text: str) -> str:
         if text.startswith("@"):
             text = text[1:]
 
-        for marker in ("bsky.app/profile/", "bsky/"):
-            if marker in text:
-                text = text.split(marker, 1)[1]
-                break
-        # case custom domain
-        else:
-            text = text.rsplit("/", 1)[-1]
-
-        # if there's no dot, assume it's a non-custom handle and append '.bsky.social'
-        if "." not in text:
-            text += ".bsky.social"
+        # Percent-encode non-ASCII characters
+        if not text.isascii():
+            text = quote(text, safe="@/-_.+~#=:")
 
-        return f"https://bsky.app/profile/{text}"
+        return text.lower()
 
 
 class EuroPythonSession(BaseModel):
diff --git a/tests/test_extract_socials.py b/tests/test_extract_socials.py
new file mode 100644
index 0000000..6eb27a0
--- /dev/null
+++ b/tests/test_extract_socials.py
@@ -0,0 +1,134 @@
+import pytest
+
+from src.models.europython import EuroPythonSpeaker
+
+
+# === Mastodon ===
+@pytest.mark.parametrize(
+    ("input_string", "result"),
+    [
+        ("https://mastodon.example/@user123", "https://mastodon.example/@user123"),
+        ("http://mastodon.example/@user123", "https://mastodon.example/@user123"),
+        (
+            "https://mastodon.example/@user123?ref=xyz",
+            "https://mastodon.example/@user123",
+        ),
+        ("@user123@mastodon.example", "https://mastodon.example/@user123"),
+        ("user123@mastodon.example", "https://mastodon.example/@user123"),
+        ("mastodon.example/@user123", "https://mastodon.example/@user123"),
+        ("www.mastodon.example/@user123", "https://mastodon.example/@user123"),
+        (" mastodon.example/@user123 ", "https://mastodon.example/@user123"),
+        ("https://instance.social/@foobar", "https://instance.social/@foobar"),
+        ("foobar@instance.social", "https://instance.social/@foobar"),
+    ],
+)
+def test_extract_mastodon_url(input_string, result):
+    assert EuroPythonSpeaker.extract_mastodon_url(input_string) == result
+
+
+# === LinkedIn ===
+@pytest.mark.parametrize(
+    ("input_string", "result"),
+    [
+        ("user123", "https://linkedin.com/in/user123"),
+        ("in/user123", "https://linkedin.com/in/user123"),
+        ("linkedin.com/in/user123", "https://linkedin.com/in/user123"),
+        ("http://linkedin.com/in/user123", "https://linkedin.com/in/user123"),
+        ("https://linkedin.com/in/user123", "https://linkedin.com/in/user123"),
+        ("https://www.linkedin.com/in/user123", "https://linkedin.com/in/user123"),
+        (
+            "https://linkedin.com/in/example-user-%C3%A3-encoded",
+            "https://linkedin.com/in/example-user-%c3%a3-encoded",
+        ),
+        (
+            "https://linkedin.com/in/example-user-ã-encoded",
+            "https://linkedin.com/in/example-user-%c3%a3-encoded",
+        ),
+        ("https://linkedin.com/in/user123?ref=xyz", "https://linkedin.com/in/user123"),
+        (" LINKEDIN.COM/IN/USER123 ", "https://linkedin.com/in/user123"),
+        (
+            "https://regional.linkedin.com/in/example",
+            "https://regional.linkedin.com/in/example",
+        ),
+    ],
+)
+def test_extract_linkedin_url(input_string, result):
+    assert EuroPythonSpeaker.extract_linkedin_url(input_string) == result
+
+
+# === Bluesky ===
+@pytest.mark.parametrize(
+    ("input_string", "result"),
+    [
+        ("user123", "https://bsky.app/profile/user123.bsky.social"),
+        ("@user123", "https://bsky.app/profile/user123.bsky.social"),
+        ("user123.bsky.social", "https://bsky.app/profile/user123.bsky.social"),
+        ("@user123.bsky.social", "https://bsky.app/profile/user123.bsky.social"),
+        ("user123.dev", "https://bsky.app/profile/user123.dev"),
+        ("bsky.app/profile/user123", "https://bsky.app/profile/user123.bsky.social"),
+        ("bsky/user123", "https://bsky.app/profile/user123.bsky.social"),
+        (
+            "www.bsky.app/profile/user123",
+            "https://bsky.app/profile/user123.bsky.social",
+        ),
+        (
+            "www.bsky.app/profile/user123.bsky.social",
+            "https://bsky.app/profile/user123.bsky.social",
+        ),
+        (
+            "http://bsky.app/profile/user123",
+            "https://bsky.app/profile/user123.bsky.social",
+        ),
+        (
+            "https://bsky.app/profile/user123",
+            "https://bsky.app/profile/user123.bsky.social",
+        ),
+        (
+            "https://bsky.app/profile/user123.dev",
+            "https://bsky.app/profile/user123.dev",
+        ),
+        (
+            "https://bsky.app/profile/user123.bsky.social",
+            "https://bsky.app/profile/user123.bsky.social",
+        ),
+        (" BSKY.APP/PROFILE/USER123 ", "https://bsky.app/profile/user123.bsky.social"),
+    ],
+)
+def test_extract_bluesky_url(input_string, result):
+    assert EuroPythonSpeaker.extract_bluesky_url(input_string) == result
+
+
+# === Twitter/X ===
+@pytest.mark.parametrize(
+    ("input_string", "result"),
+    [
+        ("user123", "https://x.com/user123"),
+        ("@user123", "https://x.com/user123"),
+        ("twitter.com/user123", "https://x.com/user123"),
+        ("https://twitter.com/user123", "https://x.com/user123"),
+        ("https://x.com/user123", "https://x.com/user123"),
+        ("http://twitter.com/user123", "https://x.com/user123"),
+        ("TWITTER.COM/user_name", "https://x.com/user_name"),
+        (" user123 ", "https://x.com/user123"),
+    ],
+)
+def test_extract_twitter_url(input_string, result):
+    assert EuroPythonSpeaker.extract_twitter_url(input_string) == result
+
+
+# === GitHub/GitLab ===
+@pytest.mark.parametrize(
+    ("input_string", "result"),
+    [
+        ("user123", "https://github.com/user123"),
+        ("@user123", "https://github.com/user123"),
+        ("github.com/user123", "https://github.com/user123"),
+        ("https://github.com/user123", "https://github.com/user123"),
+        ("gitlab.com/user123", "https://gitlab.com/user123"),
+        ("https://gitlab.com/user123", "https://gitlab.com/user123"),
+        (" http://github.com/user123 ", "https://github.com/user123"),
+        ("GITHUB.COM/USER123", "https://github.com/user123"),
+    ],
+)
+def test_extract_gitx(input_string, result):
+    assert EuroPythonSpeaker.extract_gitx_url(input_string) == result
diff --git a/tests/test_extract_socials_hypothesis.py b/tests/test_extract_socials_hypothesis.py
new file mode 100644
index 0000000..436b6cc
--- /dev/null
+++ b/tests/test_extract_socials_hypothesis.py
@@ -0,0 +1,132 @@
+from string import ascii_letters, digits
+from urllib.parse import quote
+
+import pytest
+from hypothesis import given
+from hypothesis.strategies import characters, composite, one_of, sampled_from, text
+
+from src.models.europython import EuroPythonSpeaker
+
+ALPHABET_SAFE = ascii_letters + digits + "-_"
+SAFE_USERNAME = text(
+    alphabet=ALPHABET_SAFE,
+    min_size=4,
+    max_size=15,
+).filter(lambda x: any(c.isalnum() for c in x) and not x.startswith(("-", "_")))
+
+
+@composite
+def domain_names(draw, allow_subdomains=False):
+    tlds = draw(
+        sampled_from(["com", "net", "org", "dev", "social", "app", "io", "xyz"])
+    )
+    label = text(alphabet=ascii_letters + digits + "-", min_size=1, max_size=10)
+    labels = [draw(label)]
+    if allow_subdomains:
+        labels.insert(0, draw(label))
+    return ".".join(labels + [tlds])
+
+
+# === Mastodon ===
+@given(SAFE_USERNAME)
+def test_mastodon_invalid_fallbacks(text: str):
+    result = EuroPythonSpeaker.extract_mastodon_url(text)
+    if "@" not in text and "/@" not in text:
+        assert result is None
+
+
+@given(SAFE_USERNAME, domain_names(allow_subdomains=True))
+def test_mastodon_username_at_instance(username: str, domain: str):
+    input_str = f"{username}@{domain}"
+    expected = f"https://{domain.lower()}/@{username.lower()}"
+    assert EuroPythonSpeaker.extract_mastodon_url(input_str) == expected
+
+
+@given(domain_names(allow_subdomains=True), SAFE_USERNAME)
+def test_mastodon_instance_slash_at_username(instance: str, username: str):
+    input_str = f"{instance}/@{username}"
+    expected = f"https://{instance.lower()}/@{username.lower()}"
+    assert EuroPythonSpeaker.extract_mastodon_url(input_str) == expected
+
+
+# === LinkedIn ===
+@given(SAFE_USERNAME)
+def test_linkedin_handle(username: str):
+    expected = f"https://linkedin.com/in/{username.lower()}"
+    assert EuroPythonSpeaker.extract_linkedin_url(username) == expected
+
+
+@given(
+    text(
+        alphabet=one_of(
+            characters(whitelist_categories=("Ll", "Lu", "Nd")),  # letters and numbers
+            characters(  ## special characters (accents, etc.)
+                whitelist_categories=("Ll", "Lu", "Nl", "No", "Mn", "Mc"),
+            ).filter(lambda c: not c.isascii() and not c.isspace()),
+        ),
+        min_size=1,
+        max_size=30,
+    )
+)
+def test_linkedin_encoding_support(username: str):
+    encoded = quote(username, safe="@/-_.+~#=:")
+    expected = f"https://linkedin.com/in/{encoded.lower()}"
+    assert (
+        EuroPythonSpeaker.extract_linkedin_url(f"https://linkedin.com/in/{username}")
+        == expected
+    )
+
+
+@given(domain_names().filter(lambda d: not d.endswith("linkedin.com")), SAFE_USERNAME)
+def test_linkedin_nonsense_domains(domain: str, path: str):
+    assert EuroPythonSpeaker.extract_linkedin_url(f"{domain}/in/{path}") is None
+
+
+# === Bluesky ===
+@given(SAFE_USERNAME)
+def test_bluesky_handle_to_fallback_domain(handle: str):
+    expected = f"https://bsky.app/profile/{handle.lower()}.bsky.social"
+    assert EuroPythonSpeaker.extract_bluesky_url(handle) == expected
+
+
+@given(domain_names())
+def test_bluesky_custom_domains(domain: str):
+    expected = f"https://bsky.app/profile/{domain.lower()}"
+    assert EuroPythonSpeaker.extract_bluesky_url(domain) == expected
+
+
+# === Twitter / X ===
+@given(
+    text(alphabet=ascii_letters + digits + "_", min_size=4, max_size=15),
+)
+def test_twitter_usernames(handle: str):
+    expected = f"https://x.com/{handle.lower()}"
+    assert EuroPythonSpeaker.extract_twitter_url(handle) == expected
+    assert EuroPythonSpeaker.extract_twitter_url(f"@{handle}") == expected
+    assert (
+        EuroPythonSpeaker.extract_twitter_url(f"https://twitter.com/{handle}")
+        == expected
+    )
+    assert EuroPythonSpeaker.extract_twitter_url(f"https://x.com/{handle}") == expected
+    assert (
+        EuroPythonSpeaker.extract_twitter_url(f"http://twitter.com/{handle}/")
+        == expected
+    )
+
+
+# === GitHub / GitLab ===
+@given(SAFE_USERNAME)
+def test_github_usernames(username: str):
+    expected = f"https://github.com/{username.lower()}"
+    assert EuroPythonSpeaker.extract_gitx_url(username) == expected
+    assert EuroPythonSpeaker.extract_gitx_url(f"@{username}") == expected
+    assert EuroPythonSpeaker.extract_gitx_url(f"github.com/{username}") == expected
+
+
+@given(SAFE_USERNAME)
+def test_gitlab_usernames(username: str):
+    expected = f"https://gitlab.com/{username.lower()}"
+    assert EuroPythonSpeaker.extract_gitx_url(f"gitlab.com/{username}") == expected
+    assert (
+        EuroPythonSpeaker.extract_gitx_url(f"https://gitlab.com/{username}") == expected
+    )
diff --git a/tests/test_extract_socials_negative.py b/tests/test_extract_socials_negative.py
new file mode 100644
index 0000000..f69e52b
--- /dev/null
+++ b/tests/test_extract_socials_negative.py
@@ -0,0 +1,81 @@
+import pytest
+
+from src.models.europython import EuroPythonSpeaker
+
+
+# === Mastodon ===
+@pytest.mark.parametrize(
+    ("input_string",),
+    [
+        ("",),
+        ("false",),
+        ("mastodon@",),
+        ("@user@",),
+        ("username@",),
+        ("https://mastodon.social/user",),
+        ("mastodon.social/user",),
+        ("@mastodon.social",),
+        ("https://hostux.social/users/username",),
+        ("https://social/@",),
+    ],
+)
+def test_mastodon_url_invalid(input_string):
+    assert EuroPythonSpeaker.extract_mastodon_url(input_string) is None
+
+
+# === LinkedIn ===
+@pytest.mark.parametrize(
+    ("input_string",),
+    [
+        ("",),
+        ("/in/",),
+        ("linkedin.com/in/",),
+        ("linkedin.com/in",),
+        ("linkedin.com/username",),
+        ("linkedin.net/in/username",),
+        ("http://linkedin.com/user",),
+        ("http://",),
+        ("n/a",),
+    ],
+)
+def test_linkedin_url_invalid(input_string):
+    assert EuroPythonSpeaker.extract_linkedin_url(input_string) is None
+
+
+# === Twitter / X ===
+@pytest.mark.parametrize(
+    ("input_string",),
+    [
+        ("",),
+        ("-",),
+        ("user-name",),
+        ("x.com/",),
+        ("https://x.com/",),
+        ("https://twitter.com/",),
+        ("http://",),
+        ("@user@",),
+        ("user!",),
+        ("too_long_username_123",),
+        ("twitter.com/user-with-hyphen",),
+        ("https://github.com/user",),
+    ],
+)
+def test_twitter_url_invalid(input_string):
+    assert EuroPythonSpeaker.extract_twitter_url(input_string) is None
+
+
+# === GitHub / GitLab ===
+@pytest.mark.parametrize(
+    ("input_string",),
+    [
+        ("",),
+        ("https://bitbucket.org/user",),
+        ("bitbucket.org/user",),
+        ("codeberg.org/user",),
+        ("https://git.example.com/user",),
+        ("http://",),
+        ("@/",),
+    ],
+)
+def test_gitx_url_invalid(input_string):
+    assert EuroPythonSpeaker.extract_gitx_url(input_string) is None
diff --git a/tests/test_social_media_extractions.py b/tests/test_social_media_extractions.py
deleted file mode 100644
index 5c13a50..0000000
--- a/tests/test_social_media_extractions.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pytest
-
-from src.models.europython import EuroPythonSpeaker
-
-
-@pytest.mark.parametrize(
-    ("input_string", "result"),
-    [
-        ("http://mastodon.social/@username", "https://mastodon.social/@username"),
-        ("https://mastodon.social/@username", "https://mastodon.social/@username"),
-        (
-            "https://mastodon.social/@username?something=true",
-            "https://mastodon.social/@username",
-        ),
-        ("@username@mastodon.social", "https://mastodon.social/@username"),
-        ("username@mastodon.social", "https://mastodon.social/@username"),
-    ],
-)
-def test_extract_mastodon_url(input_string: str, result: str) -> None:
-    assert EuroPythonSpeaker.extract_mastodon_url(input_string) == result
-
-
-@pytest.mark.parametrize(
-    ("input_string", "result"),
-    [
-        ("username", "https://linkedin.com/in/username"),
-        ("linkedin.com/in/username", "https://linkedin.com/in/username"),
-        ("in/username", "https://linkedin.com/in/username"),
-        ("www.linkedin.com/in/username", "https://www.linkedin.com/in/username"),
-        ("http://linkedin.com/in/username", "https://linkedin.com/in/username"),
-        ("https://linkedin.com/in/username", "https://linkedin.com/in/username"),
-    ],
-)
-def test_extract_linkedin_url(input_string: str, result: str) -> None:
-    assert EuroPythonSpeaker.extract_linkedin_url(input_string) == result
-
-
-@pytest.mark.parametrize(
-    ("input_string", "result"),
-    [
-        ("username", "https://bsky.app/profile/username.bsky.social"),
-        ("@username", "https://bsky.app/profile/username.bsky.social"),
-        ("username.dev", "https://bsky.app/profile/username.dev"),
-        ("@username.dev", "https://bsky.app/profile/username.dev"),
-        ("username.bsky.social", "https://bsky.app/profile/username.bsky.social"),
-        ("bsky.app/profile/username", "https://bsky.app/profile/username.bsky.social"),
-        ("bsky/username", "https://bsky.app/profile/username.bsky.social"),
-        (
-            "www.bsky.app/profile/username",
-            "https://bsky.app/profile/username.bsky.social",
-        ),
-        (
-            "www.bsky.app/profile/username.bsky.social",
-            "https://bsky.app/profile/username.bsky.social",
-        ),
-        (
-            "http://bsky.app/profile/username",
-            "https://bsky.app/profile/username.bsky.social",
-        ),
-        (
-            "https://bsky.app/profile/username.com",
-            "https://bsky.app/profile/username.com",
-        ),
-        (
-            "https://bsky.app/profile/username.bsky.social",
-            "https://bsky.app/profile/username.bsky.social",
-        ),
-    ],
-)
-def test_extract_bluesky_url(input_string: str, result: str) -> None:
-    assert EuroPythonSpeaker.extract_bluesky_url(input_string) == result
diff --git a/uv.lock b/uv.lock
index 0f54919..76b45b1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -11,6 +11,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 },
 ]
 
+[[package]]
+name = "attrs"
+version = "25.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.1.31"
@@ -91,6 +100,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 },
 ]
 
+[[package]]
+name = "hypothesis"
+version = "6.131.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "sortedcontainers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/73/79/82eaf131f58a5c434830f0c196995a5071531765c51ebc8aaff493002b5e/hypothesis-6.131.0.tar.gz", hash = "sha256:4b807daeeee47852edfd9818ba0e33df14902f1b78a5524f1a3fb71f80c7cec3", size = 430541 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/43/b5e5d397e1ece9b71584c1771214b4a643d81fa8f197d4ff5938295ead25/hypothesis-6.131.0-py3-none-any.whl", hash = "sha256:734959017e3ee4ef8f0ecb4e5169c8f4cf96dc83a997d2edf01fb5350f5bf2f4", size = 495720 },
+]
+
 [[package]]
 name = "identify"
 version = "2.6.9"
@@ -184,6 +206,7 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "hypothesis" },
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "ruff" },
@@ -200,6 +223,7 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "hypothesis", specifier = ">=6.131.0" },
     { name = "pre-commit", specifier = ">=4.2" },
     { name = "pytest", specifier = ">=8.3.5" },
     { name = "ruff", specifier = ">=0.11.4" },
@@ -364,6 +388,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4f/03/3aec4846226d54a37822e4c7ea39489e4abd6f88388fba74e3d4abe77300/ruff-0.11.4-py3-none-win_arm64.whl", hash = "sha256:d435db6b9b93d02934cf61ef332e66af82da6d8c69aefdea5994c89997c7a0fc", size = 10450306 },
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 },
+]
+
 [[package]]
 name = "text-unidecode"
 version = "1.3"

From 645538b455bff38302f94f4827e40da5792625be Mon Sep 17 00:00:00 2001
From: egeakman <me@egeakman.dev>
Date: Sun, 13 Apr 2025 22:08:27 -0400
Subject: [PATCH 2/2] Some documentation

---
 README.md               | 123 +++++++++++---
 data/examples/README.md | 361 ++++++++++++++++++++--------------------
 2 files changed, 277 insertions(+), 207 deletions(-)

diff --git a/README.md b/README.md
index 9cd7e0f..d8d175e 100644
--- a/README.md
+++ b/README.md
@@ -1,41 +1,114 @@
-# programapi
+# 🎤 programapi
 
-This project downloads, processes, saves, and serves the static JSON files containing details of accepted speakers and submissions via an API.
+This project powers the **EuroPython 2025** website and Discord bot by downloading, transforming, and serving clean, structured JSON files for sessions, speakers, and the schedule, all pulled from Pretalx.
 
-Used by the EuroPython 2024 website and the Discord bot.
+Built for transparency. Designed for reuse. Optimized for EuroPython.
 
-**What this project does step-by-step:**
+---
 
-1. Downloads the Pretalx speaker and submission data, and saves it as JSON files.
-2. Transforms the JSON files into a format that is easier to work with and OK to serve publicly. This includes removing unnecessary/private fields, and adding new fields.
-3. Serves the JSON files via an API.
+## 🚀 What This Project Does
 
-## Installation
+1. **Downloads** submission and speaker data from Pretalx.
+2. **Transforms** raw data:
+   - Removes private/irrelevant fields
+   - Normalizes formats
+   - Adds computed fields (e.g. URLs, delivery mode)
+3. **Serves** the transformed JSON files via a static API.
 
-1. Clone the repository.
-2. Install the dependency management tool: ``make deps/pre``
-3. Install the dependencies: ``make deps/install``
-4. Set up ``pre-commit``: ``make pre-commit``
+---
 
-## Configuration
+## ⚙️ Installation
 
-You can change the event in the [``config.py``](src/config.py) file. It is set to ``europython-2024`` right now.
+1. **Clone the repo**
+   ```bash
+   git clone https://github.com/EuroPython/programapi.git
+   cd programapi
+   ```
 
-## Usage
+2. **Install [uv](https://docs.astral.sh/uv/getting-started/installation/)** (fast Python package manager)
 
-- Run the whole process: ``make all``
-- Run only the download process: ``make download``
-- Run only the transformation process: ``make transform``
+3. **Create a Python 3.13 virtual environment**
+   ```bash
+   uv venv -p 3.13
+   ```
 
-**Note:** Don't forget to set ``PRETALX_TOKEN`` in your ``.env`` file at the root of the project. And please don't make too many requests to the Pretalx API, it might get angry 🤪
+4. **Install dev dependencies**
+   ```bash
+   make dev
+   ```
 
-## API
+5. **Enable pre-commit hooks**
+   ```bash
+   make pre-commit
+   ```
 
-The API is served at ``https://programapi24.europython.eu/2024``. It has two endpoints (for now):
+---
 
-- ``/speakers.json``: Returns the list of confirmed speakers.
-- ``/sessions.json``: Returns the list of confirmed sessions.
+## 🛠️ Configuration
 
-## Schema
+You can update the event year or shortname in [`src/config.py`](src/config.py).
 
-See [this page](data/examples/README.md) for the explanations of the fields in the returned JSON files.
+Also, create a `.env` file in the project root and set:
+
+```env
+PRETALX_TOKEN=your_api_token_here
+```
+
+(Yes, Pretalx has rate limits. Please be nice. 🤪)
+
+---
+
+## 📦 Usage
+
+- Run the **entire pipeline**:
+  ```bash
+  make all
+  ```
+
+- Run only the **download step**:
+  ```bash
+  make download
+  ```
+
+- Run only the **transformation step**:
+  ```bash
+  make transform
+  ```
+
+- (Optional) **Exclude components**:
+  ```bash
+  make all EXCLUDE="schedule youtube"
+  ```
+
+---
+
+## 🌐 API Endpoints
+
+Hosted at:
+
+```
+https://static.europython.eu/programme/ep2025/releases/current
+```
+
+| Endpoint            | Description                                |
+|---------------------|--------------------------------------------|
+| `/speakers.json`    | List of confirmed speakers                 |
+| `/sessions.json`    | List of confirmed sessions                 |
+| `/schedule.json`    | Finalized conference schedule *(TBA)*      |
+
+---
+
+## 📖 Schema Documentation
+
+Looking for field definitions and examples?
+Check out the 👉 [`data/examples/README.md`](data/examples/README.md) for a full schema reference with example payloads and explanations.
+
+---
+
+## 💬 Questions? Feedback?
+
+Feel free to open an issue or reach us at [infra@europython.eu](mailto:infra@europython.eu). We love contributors 💜
+
+---
+
+📅 Last updated for: **EuroPython 2025**
diff --git a/data/examples/README.md b/data/examples/README.md
index c54e02e..5992b3d 100644
--- a/data/examples/README.md
+++ b/data/examples/README.md
@@ -1,96 +1,14 @@
-# Explaining the Output Data
+# 📄 ProgramAPI Output Documentation
 
-**Note:** Some of the fields may be `null` or empty (`""`).
+> ⚠️ Some fields may be `null`, `""`, or excluded from specific contexts.
+> 🍭 Also, yes, Rick Astley may appear in test videos. You're welcome.
 
-## `sessions.json`
+---
 
-<details>
-<summary>Example session data JSON</summary>
-
-```json
-{
-    "A1B2C3": {
-        "code": "A1B2C3",
-        "title": "Example talk",
-        "speakers": [
-        "B4D5E6",
-        ...
-        ],
-        "session_type": "Talk",
-        "slug": "example-talk",
-        "track": "Some Track",
-        "state": "confirmed",
-        "abstract": "This is an example talk. It is a great talk.",
-        "tweet": "This is an example talk.",
-        "duration": "60",
-        "level": "intermediate",
-        "delivery": "in-person",
-        "resources": [
-            {
-                "resource": "https://example.com/slides.pdf",
-                "description": "Slides for the session"
-            }
-        ...
-        ],
-        "room": "South Hall 2A",
-        "start": "2099-07-10T14:00:00+02:00",
-        "end": "2099-07-10T15:00:00+02:00",
-        "website_url": "https://ep2099.europython.eu/session/example-talk/",
-        "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ&pp=ygUJcmljayByb2xs",
-        "sessions_in_parallel": [
-        "F7G8H9",
-        ...
-        ],
-        "sessions_after": [
-        "I0J1K2",
-        ...
-        ],
-        "sessions_before": [
-        "L3M4N5",
-        ...
-        ],
-        "next_session": "O6P7Q8",
-        "prev_session": "R9S0T1"
-    },
-}
-```
-</details>
-
-&nbsp;
-
-The fields are as follows:
-
-| Key                    | Type                                      | Notes                                                         |
-|------------------------|-------------------------------------------|---------------------------------------------------------------|
-| `code`                 | `string`                                  | Unique identifier for the session                             |
-| `title`                | `string`                                  | Title of the session                                          |
-| `speakers`             | `array[string]`                           | List of codes of the speakers                                 |
-| `session_type`         | `string`                                  | Type of the session (e.g. Talk, Workshop, Poster, etc.)       |
-| `slug`                 | `string`                                  | URL-friendly version of the title                             |
-| `track`                | `string` \| `null`                        | Track of the session (e.g. PyData, Web, etc.)                 |
-| `abstract`             | `string`                                  | Abstract of the session                                       |
-| `tweet`                | `string`                                  | Tweet-length description of the session                       |
-| `duration`             | `string`                                  | Duration of the session in minutes                            |
-| `level`                | `string`                                  | Level of the session (e.g. beginner, intermediate, advanced)  |
-| `delivery`             | `string`                                  | Delivery mode of the session (e.g. in-person, remote)         |
-| `resources`            | `array[object[string, string]]` \| `null` | List of resources for the session: `{"resource": <url>, "description": <description>}` |
-| `room`                 | `string` \| `null`                        | Room where the session will be held                           |
-| `start`                | `string (datetime ISO format)` \| `null`  | Start time of the session                                     |
-| `end`                  | `string (datetime ISO format)` \| `null`  | End time of the session                                       |
-| `website_url`          | `string`                                  | URL of the session on the conference website                  |
-| `youtube_url`          | `string` \| `null`                        | URL of the session's video on YouTube                         |
-| `sessions_in_parallel` | `array[string]` \| `null`                 | List of codes of sessions happening in parallel               |
-| `sessions_after`       | `array[string]` \| `null`                 | List of codes of sessions happening after this session        |
-| `sessions_before`      | `array[string]` \| `null`                 | List of codes of sessions happening before this session       |
-| `next_session`         | `string` \| `null`                        | Code of the next session in the same room                     |
-| `prev_session`         | `string` \| `null`                        | Code of the previous session in the same room                 |
-
-&nbsp;
-
-## `speakers.json`
+## 🗣 `speakers.json`
 
 <details>
-<summary>Example speaker data JSON</summary>
+<summary>Example speaker data</summary>
 
 ```json
 {
@@ -100,119 +18,198 @@ The fields are as follows:
     "biography": "Some bio",
     "avatar": "https://pretalx.com/media/avatars/picture.jpg",
     "slug": "a-speaker",
-    "submissions": [
-      "A1B2C3",
-      ...
-    ],
+    "submissions": ["A1B2C3"],
     "affiliation": "A Company",
     "homepage": "https://example.com",
-    "gitx": "https://github.com/B4D5E6",
-    "linkedin_url": "https://www.linkedin.com/in/B4D5E6",
+    "twitter_url": "https://x.com/B4D5E6",
+    "linkedin_url": "https://linkedin.com/in/B4D5E6",
     "mastodon_url": "https://mastodon.social/@B4D5E6",
-    "twitter_url": "https://x.com/B4D5E6"
-  },
-  ...
+    "bluesky_url": "https://bsky.app/profile/B4D5E6.bsky.social",
+    "gitx_url": "https://github.com/B4D5E6",
+    "website_url": "https://ep2099.europython.eu/speaker/a-speaker"
+  }
 }
 ```
 </details>
 
-&nbsp;
-
-The fields are as follows:
+### Fields
 
 | Key            | Type               | Notes                                                                 |
 |----------------|--------------------|-----------------------------------------------------------------------|
 | `code`         | `string`           | Unique identifier for the speaker                                     |
-| `name`         | `string`           | Name of the speaker                                                   |
-| `biography`    | `string` \| `null` | Biography of the speaker                                              |
-| `avatar`       | `string`           | URL of the speaker's avatar                                           |
-| `slug`         | `string`           | URL-friendly version of the name                                      |
-| `submissions`  | `array[string]`    | List of codes of the sessions the speaker is speaking at              |
-| `affiliation`  | `string` \| `null` | Affiliation of the speaker                                            |
-| `homepage`     | `string` \| `null` | URL/text of the speaker's homepage                                    |
-| `gitx`         | `string` \| `null` | URL/text of the speaker's GitHub/GitLab/etc. profile                  |
-| `linkedin_url` | `string` \| `null` | URL of the speaker's LinkedIn profile                                 |
-| `twitter_url`  | `string` \| `null` | URL of the speaker's Twitter profile                                  |
-| `mastodon_url` | `string` \| `null` | URL of the speaker's Mastodon profile                                 |
-| `website_url`  | `string`           | URL of the speaker's profile on the conference website                |
-
-&nbsp;
-
-## `schedule.json`
+| `name`         | `string`           | Full name of the speaker                                              |
+| `biography`    | `string` \| `null` | Short biography                                                       |
+| `avatar`       | `string`           | URL of speaker's avatar                                               |
+| `slug`         | `string`           | URL-safe speaker name                                                 |
+| `submissions`  | `array[string]`    | Codes of sessions the speaker is involved in                          |
+| `affiliation`  | `string` \| `null` | Affiliated institution or organization                                |
+| `homepage`     | `string` \| `null` | Personal or professional homepage                                     |
+| `twitter_url`  | `string` \| `null` | Normalized Twitter/X profile URL                                      |
+| `mastodon_url` | `string` \| `null` | Normalized Mastodon profile URL                                       |
+| `linkedin_url` | `string` \| `null` | Normalized LinkedIn profile URL                                       |
+| `bluesky_url`  | `string` \| `null` | Normalized Bluesky profile URL                                        |
+| `gitx_url`     | `string` \| `null` | Normalized GitHub/GitLab profile URL                                  |
+| `website_url`  | `string`           | Auto-generated speaker profile on the EuroPython site                 |
+
+---
+
+## 📚 `sessions.json`
 
 <details>
-<summary>Example schedule data JSON</summary>
+<summary>Example session data</summary>
 
 ```json
 {
-    "days": {
-        "2099-07-08": {
-            "events": [
-                {
-                    "code": "LMN123",
-                    "title": "Welcome and Keynote",
-                    "speakers": [],
-                    "session_type": "Announcements",
-                    "slug": "welcome-keynote",
-                    "track": null,
-                    "level": "beginner",
-                    "rooms": [
-                        "Room A",
-                        "Room B"
-                    ],
-                    "start": "2099-07-08T08:00:00+02:00",
-                    "duration": 60,
-                    "tweet": "",
-                    "website_url": "https://ep2099.europython.eu/session/welcome-keynote"
-                },
-                {
-                    "code": "OPQ456",
-                    "title": "Advanced Python Techniques",
-                    "speakers": [
-                        {
-                            "avatar": "https://pretalx.com/media/avatars/picture.jpg",
-                            "code": "RST789",
-                            "name": "John Doe",
-                            "slug": "john-doe",
-                            "website_url": "https://ep2099.europython.eu/speaker/john-doe"
-                        }
-                    ],
-                    "session_type": "Tutorial",
-                    "slug": "advanced-python-techniques",
-                    "track": "CPython Internals",
-                    "level": "advanced",
-                    "rooms": [
-                        "Room C"
-                    ],
-                    "start": "2099-07-08T10:00:00+02:00",
-                    "duration": 90,
-                    "tweet": "",
-                    "website_url": "https://ep2099.europython.eu/advanced-python-techniques"
-                }
-            ]
+  "A1B2C3": {
+    "code": "A1B2C3",
+    "title": "Example talk",
+    "speakers": ["B4D5E6"],
+    "session_type": "Talk",
+    "slug": "example-talk",
+    "track": "Some Track",
+    "abstract": "This is an example talk.",
+    "tweet": "This is an example talk.",
+    "duration": "60",
+    "level": "intermediate",
+    "delivery": "in-person",
+    "resources": [
+      {
+        "resource": "https://example.com/slides.pdf",
+        "description": "Slides for the session"
+      }
+    ],
+    "room": "South Hall 2A",
+    "start": "2099-07-10T14:00:00+02:00",
+    "end": "2099-07-10T15:00:00+02:00",
+    "website_url": "https://ep2099.europython.eu/session/example-talk",
+    "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+    "sessions_in_parallel": ["F7G8H9"],
+    "sessions_after": ["I0J1K2"],
+    "sessions_before": ["L3M4N5"],
+    "next_session": "O6P7Q8",
+    "prev_session": "R9S0T1"
+  }
+}
+```
+</details>
+
+### Fields
+
+| Key                    | Type                                      | Notes                                                           |
+|------------------------|-------------------------------------------|-----------------------------------------------------------------|
+| `code`                 | `string`                                  | Unique session identifier                                       |
+| `title`                | `string`                                  | Title of the session                                            |
+| `speakers`             | `array[string]`                           | List of speaker codes                                           |
+| `session_type`         | `string`                                  | Type of session (e.g. Talk, Workshop)                           |
+| `slug`                 | `string`                                  | URL-friendly session name                                       |
+| `track`                | `string` \| `null`                        | Associated track                                                |
+| `abstract`             | `string`                                  | Abstract or session description                                 |
+| `tweet`                | `string`                                  | Short summary (tweet-style)                                     |
+| `duration`             | `string`                                  | Duration in minutes as string                                   |
+| `level`                | `string`                                  | Level of session (e.g. beginner, intermediate)                  |
+| `delivery`             | `string`                                  | Delivery format (in-person or remote)                           |
+| `resources`            | `array[object]` \| `null`                 | Supplementary materials (URL and description)                   |
+| `room`                 | `string` \| `null`                        | Assigned room (e.g. "Exhibit Hall" auto-mapped for posters)     |
+| `start`                | `datetime` \| `null`                      | ISO datetime of start                                           |
+| `end`                  | `datetime` \| `null`                      | ISO datetime of end                                             |
+| `website_url`          | `string`                                  | URL of session on the EuroPython site                           |
+| `youtube_url`          | `string` \| `null`                        | YouTube link to session video (🎵 never gonna give you up?)     |
+| `sessions_in_parallel` | `array[string]` \| `null`                 | Session codes running at the same time                          |
+| `sessions_after`       | `array[string]` \| `null`                 | Sessions immediately after this one                             |
+| `sessions_before`      | `array[string]` \| `null`                 | Sessions immediately before this one                            |
+| `next_session`         | `string` \| `null`                        | Next session in the same room                                   |
+| `prev_session`         | `string` \| `null`                        | Previous session in the same room                               |
+
+---
+
+## 🗓️ `schedule.json`
+
+<details>
+<summary>Example schedule day</summary>
+
+```json
+{
+  "days": {
+    "2099-07-08": {
+      "rooms": ["Room A", "Room B", "Room C"],
+      "events": [
+        {
+          "event_type": "SESSION",
+          "code": "OPQ456",
+          "slug": "advanced-python",
+          "title": "Advanced Python Techniques",
+          "session_type": "Tutorial",
+          "speakers": [
+            {
+              "code": "RST789",
+              "name": "John Doe",
+              "avatar": "https://pretalx.com/media/avatars/picture.jpg",
+              "slug": "john-doe",
+              "website_url": "https://ep2099.europython.eu/speaker/john-doe"
+            }
+          ],
+          "track": "CPython Internals",
+          "tweet": "",
+          "level": "advanced",
+          "rooms": ["Room C"],
+          "start": "2099-07-08T10:00:00+02:00",
+          "duration": 90,
+          "website_url": "https://ep2099.europython.eu/session/advanced-python"
+        },
+        {
+          "event_type": "BREAK",
+          "title": "Coffee Break",
+          "duration": 30,
+          "rooms": ["Room A", "Room B"],
+          "start": "2099-07-08T11:30:00+02:00"
         }
+      ]
     }
+  }
 }
 ```
 </details>
 
-&nbsp;
-
-The fields are as follows:
-
-| Key            | Type                        | Notes                                                      |
-|----------------|-----------------------------|------------------------------------------------------------|
-| `days`         | `object`                    | Contains schedule by date                                  |
-| `events`       | `array[object]`             | List of events for a particular day                        |
-| `code`         | `string`                    | Unique identifier for the event                            |
-| `title`        | `string`                    | Title of the event                                         |
-| `speakers`     | `array[object]`             | List of speakers for the event (if applicable)             |
-| `session_type` | `string`                    | Type of event (e.g. Announcements, Workshop, etc.)         |
-| `slug`         | `string`                    | URL-friendly version of the event title                    |
-| `track`        | `string` \| `null`          | Track associated with the event (e.g. Web, PyData, etc.)   |
-| `level`        | `string`                    | Level of the event (beginner, intermediate, advanced)       |
-| `rooms`        | `array[string]`             | List of rooms the event is being held in                   |
-| `start`        | `string (datetime ISO)`      | Start time of the event                                    |
-| `duration`     | `integer`                   | Duration of the event in minutes                           |
-| `tweet`        | `string` \| `null`          | Tweet-length description of the event                      |
-| `website_url`  | `string`                    | URL of the event on the conference website                 |
+### Fields
+
+| Key         | Type                             | Notes                                                |
+|-------------|----------------------------------|------------------------------------------------------|
+| `days`      | `dict[date, DaySchedule]`        | Schedule grouped by day                              |
+| `rooms`     | `list[string]`                   | All rooms active on that day                         |
+| `events`    | `list[Session or Break]`         | Mixed list of sessions and breaks                    |
+
+#### `Session` (EventType = `"SESSION"`)
+
+| Field         | Type                                 | Notes                                               |
+|---------------|--------------------------------------|-----------------------------------------------------|
+| `event_type`  | `"SESSION"`                          | Constant                                            |
+| `code`        | `string`                             | Session code                                        |
+| `slug`        | `string`                             | URL-friendly name                                   |
+| `title`       | `string`                             | Title of the session                                |
+| `session_type`| `string`                             | Talk, Workshop, etc.                                |
+| `speakers`    | `array[Speaker]`                     | Mini speaker profiles                               |
+| `track`       | `string` \| `null`                   | Optional topic track                                |
+| `tweet`       | `string`                             | Short description                                   |
+| `level`       | `string`                             | Beginner, Intermediate, etc.                        |
+| `rooms`       | `array[string]`                      | One or more rooms                                   |
+| `start`       | `datetime`                           | ISO 8601                                            |
+| `duration`    | `int`                                | Computed from `total_duration / slot_count`         |
+| `website_url` | `string`                             | Link to session page                                |
+
+#### `Break` (EventType = `"BREAK"`)
+
+| Field         | Type              | Notes                          |
+|---------------|-------------------|--------------------------------|
+| `event_type`  | `"BREAK"`         | Constant                       |
+| `title`       | `string`          | Name of the break              |
+| `duration`    | `int`             | Minutes                        |
+| `rooms`       | `array[string]`   | Rooms where the break applies |
+| `start`       | `datetime`        | Start time                     |
+
+---
+
+### 🛠 Notes & Logic
+
+- `room` normalization maps `"Main Hall"` sessions to `"Exhibit Hall"` — Poster sessions rejoice!
+- All `"Registration & Welcome"` events automatically include **all active rooms**.
+- Various `social_*_url` fields handle malformed inputs like `@name`, full URLs, or just `username`.