Skip to content

Commit 9a49abc

Browse files
committed
Add pycountry integration for language standardization
- Add pycountry dependency to Pipfile - Add get_language_name() function to shared.py - Supports language code and name standardization for zenodo_fetch.py
1 parent e171bce commit 9a49abc

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

scripts/shared.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from requests import Session
1212
from requests.adapters import HTTPAdapter
1313
from urllib3.util import Retry
14+
import pycountry
1415

1516
# Constants
1617
STATUS_FORCELIST = [
@@ -369,3 +370,46 @@ def update_readme(
369370
logger.info(
370371
f"Updated README with new image and description for {entry_title}."
371372
)
373+
374+
375+
def get_language_name(lang_text):
376+
"""
377+
Standardize language codes/names using pycountry.
378+
379+
Args:
380+
lang_text (str): Language code or name to standardize
381+
382+
Returns:
383+
str: Standardized language name or "Unknown" if not found
384+
"""
385+
if not lang_text:
386+
return "Unknown"
387+
388+
lang_text = lang_text.strip().lower()
389+
390+
# Try to find by alpha_2 code (e.g., 'en' -> 'English')
391+
try:
392+
language = pycountry.languages.get(alpha_2=lang_text)
393+
if language:
394+
return language.name
395+
except (KeyError, AttributeError):
396+
pass
397+
398+
# Try to find by alpha_3 code (e.g., 'eng' -> 'English')
399+
try:
400+
language = pycountry.languages.get(alpha_3=lang_text)
401+
if language:
402+
return language.name
403+
except (KeyError, AttributeError):
404+
pass
405+
406+
# Try to find by name (case-insensitive)
407+
try:
408+
for language in pycountry.languages:
409+
if hasattr(language, 'name') and language.name.lower() == lang_text:
410+
return language.name
411+
except (KeyError, AttributeError):
412+
pass
413+
414+
# Return original text if no match found
415+
return lang_text.title() if lang_text else "Unknown"

0 commit comments

Comments
 (0)