Skip to content

Commit f05ab8b

Browse files
authored
Include more username formats for Mastodon and Bluesky (#119)
* Fix Bluesky * Expand mastodon support * Let's not stop the script for a single invalid input
1 parent 9f585f7 commit f05ab8b

File tree

2 files changed

+27
-10
lines changed

2 files changed

+27
-10
lines changed

src/models/europython.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,18 +91,28 @@ def extract_twitter_url(text: str) -> str:
9191
return twitter_url.split("?")[0]
9292

9393
@staticmethod
94-
def extract_mastodon_url(text: str) -> str:
94+
def extract_mastodon_url(text: str) -> None | str:
9595
"""
96-
Extract the Mastodon URL from the answer, handle @username@instance format
96+
Normalize Mastodon handle or URL to the format: https://<instance>/@<username>
9797
"""
98-
if not text.startswith(("https://", "http://")) and text.count("@") == 2:
99-
mastodon_url = f"https://{text.split('@')[2]}/@{text.split('@')[1]}"
100-
else:
101-
mastodon_url = (
102-
f"https://{text.removeprefix('https://').removeprefix('http://')}"
103-
)
98+
text = text.strip().split("?", 1)[0]
99+
100+
# Handle @username@instance or username@instance formats
101+
if "@" in text and not text.startswith("http"):
102+
parts = text.split("@")
103+
if len(parts) == 3: # @username@instance
104+
_, username, instance = parts
105+
elif len(parts) == 2: # username@instance
106+
username, instance = parts
107+
else:
108+
return None
109+
return f"https://{instance}/@{username}"
104110

105-
return mastodon_url.split("?")[0]
111+
# Handle full URLs
112+
if text.startswith("http://"):
113+
text = "https://" + text[len("http://") :]
114+
115+
return text
106116

107117
@staticmethod
108118
def extract_linkedin_url(text: str) -> str:
@@ -126,7 +136,7 @@ def extract_bluesky_url(text: str) -> str:
126136
Returns a normalized BlueSky URL in the form https://bsky.app/profile/<USERNAME>.bsky.social,
127137
or uses the entire domain if it's custom (e.g., .dev).
128138
"""
129-
text = text.split("?", 1)[0].strip()
139+
text = text.strip().split("?", 1)[0]
130140

131141
if text.startswith("https://"):
132142
text = text[8:]
@@ -136,6 +146,10 @@ def extract_bluesky_url(text: str) -> str:
136146
if text.startswith("www."):
137147
text = text[4:]
138148

149+
# Remove @ if present
150+
if text.startswith("@"):
151+
text = text[1:]
152+
139153
for marker in ("bsky.app/profile/", "bsky/"):
140154
if marker in text:
141155
text = text.split(marker, 1)[1]

tests/test_social_media_extractions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"https://mastodon.social/@username",
1414
),
1515
("@[email protected]", "https://mastodon.social/@username"),
16+
("[email protected]", "https://mastodon.social/@username"),
1617
],
1718
)
1819
def test_extract_mastodon_url(input_string: str, result: str) -> None:
@@ -38,7 +39,9 @@ def test_extract_linkedin_url(input_string: str, result: str) -> None:
3839
("input_string", "result"),
3940
[
4041
("username", "https://bsky.app/profile/username.bsky.social"),
42+
("@username", "https://bsky.app/profile/username.bsky.social"),
4143
("username.dev", "https://bsky.app/profile/username.dev"),
44+
("@username.dev", "https://bsky.app/profile/username.dev"),
4245
("username.bsky.social", "https://bsky.app/profile/username.bsky.social"),
4346
("bsky.app/profile/username", "https://bsky.app/profile/username.bsky.social"),
4447
("bsky/username", "https://bsky.app/profile/username.bsky.social"),

0 commit comments

Comments
 (0)