Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions src/models/europython.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,28 @@ def extract_twitter_url(text: str) -> str:
return twitter_url.split("?")[0]

@staticmethod
def extract_mastodon_url(text: str) -> str:
def extract_mastodon_url(text: str) -> None | str:
"""
Extract the Mastodon URL from the answer, handle @username@instance format
Normalize Mastodon handle or URL to the format: https://<instance>/@<username>
"""
if not text.startswith(("https://", "http://")) and text.count("@") == 2:
mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}"
else:
mastodon_url = (
f"https://{text.removeprefix('https://').removeprefix('http://')}"
)
text = text.strip().split("?", 1)[0]

# Handle @username@instance or username@instance formats
if "@" in text and not text.startswith("http"):
parts = text.split("@")
if len(parts) == 3: # @username@instance
_, username, instance = parts
elif len(parts) == 2: # username@instance
username, instance = parts
else:
return None
return f"https://{instance}/@{username}"

return mastodon_url.split("?")[0]
# Handle full URLs
if text.startswith("http://"):
text = "https://" + text[len("http://") :]

return text

@staticmethod
def extract_linkedin_url(text: str) -> str:
Expand All @@ -126,7 +136,7 @@ def extract_bluesky_url(text: str) -> str:
Returns a normalized BlueSky URL in the form https://bsky.app/profile/<USERNAME>.bsky.social,
or uses the entire domain if it's custom (e.g., .dev).
"""
text = text.split("?", 1)[0].strip()
text = text.strip().split("?", 1)[0]

if text.startswith("https://"):
text = text[8:]
Expand All @@ -136,6 +146,10 @@ def extract_bluesky_url(text: str) -> str:
if text.startswith("www."):
text = text[4:]

# Remove @ if present
if text.startswith("@"):
text = text[1:]

for marker in ("bsky.app/profile/", "bsky/"):
if marker in text:
text = text.split(marker, 1)[1]
Expand Down
3 changes: 3 additions & 0 deletions tests/test_social_media_extractions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"https://mastodon.social/@username",
),
("@[email protected]", "https://mastodon.social/@username"),
("[email protected]", "https://mastodon.social/@username"),
],
)
def test_extract_mastodon_url(input_string: str, result: str) -> None:
Expand All @@ -38,7 +39,9 @@ def test_extract_linkedin_url(input_string: str, result: str) -> None:
("input_string", "result"),
[
("username", "https://bsky.app/profile/username.bsky.social"),
("@username", "https://bsky.app/profile/username.bsky.social"),
("username.dev", "https://bsky.app/profile/username.dev"),
("@username.dev", "https://bsky.app/profile/username.dev"),
("username.bsky.social", "https://bsky.app/profile/username.bsky.social"),
("bsky.app/profile/username", "https://bsky.app/profile/username.bsky.social"),
("bsky/username", "https://bsky.app/profile/username.bsky.social"),
Expand Down
Loading