Skip to content

Commit 1a16dfa

Browse files
committed
Update codebase: improvements to intblocks, downloader, config, and tests
1 parent a0f9b0f commit 1a16dfa

File tree

9 files changed

+249
-12
lines changed

9 files changed

+249
-12
lines changed

CHANGELOG.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
## [0.2.0] - 2025-12-20
1111

12-
1312
Initial release of Internacia Python SDK
1413

1514
### Added

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Internacia Python SDK
22

3-
A Python SDK for accessing [internacia-db](https://github.com/commondataio/internacia-db) data with support for countries, international blocks, and fuzzy search across multiple languages.
3+
A Python SDK for accessing [internacia-db](https://github.com/datenoio/internacia-db) data with support for countries, international blocks, and fuzzy search across multiple languages.
44

55
## Features
66

@@ -631,7 +631,7 @@ Contributions are welcome! Please feel free to submit a Pull Request.
631631

632632
## Related Projects
633633

634-
- [internacia-db](https://github.com/commondataio/internacia-db): The data repository
634+
- [internacia-db](https://github.com/datenoio/internacia-db): The data repository
635635
- [internacia-api](https://github.com/commondataio/internacia-api): REST API for internacia-db
636636

637637
## Changelog

dev/docs/REPOSITORY_ANALYSIS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ if limit is not None:
5151

5252
**Issue**: A hardcoded absolute path is included in the codebase:
5353
```python
54-
Path("/Users/ibegtin/workspace/commondataio/internacia-db/data/datasets/internacia.duckdb")
54+
Path("/Users/ibegtin/workspace/datenoio/internacia-db/data/datasets/internacia.duckdb")
5555
```
5656

5757
**Recommendation**: Remove hardcoded paths. Use environment variables or configuration files instead.

internacia/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
download_database,
3535
get_latest_version,
3636
check_for_updates,
37+
list_releases,
3738
get_cache_dir,
3839
get_cached_database_path,
3940
)
@@ -63,6 +64,7 @@
6364
"download_database",
6465
"get_latest_version",
6566
"check_for_updates",
67+
"list_releases",
6668
"get_cache_dir",
6769
"get_cached_database_path",
6870
]

internacia/config.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,22 @@ def get_db_path(cls, explicit_path: Optional[Path] = None) -> Optional[Path]:
5353

5454
# Try cached database (lazy import to avoid circular dependency)
5555
try:
56-
from internacia.downloader import get_cached_database_path # pylint: disable=import-outside-toplevel
56+
from internacia.downloader import get_cached_database_path, get_cache_dir # pylint: disable=import-outside-toplevel
57+
58+
# First check unversioned cache path
5759
cached_path = get_cached_database_path()
5860
if cached_path.exists():
5961
return cached_path
62+
63+
# If not found, check versioned cache directories
64+
cache_dir = get_cache_dir()
65+
if cache_dir.exists():
66+
# Look for database files in versioned subdirectories
67+
for item in cache_dir.iterdir():
68+
if item.is_dir():
69+
db_file = item / "internacia.duckdb"
70+
if db_file.exists():
71+
return db_file
6072
except Exception: # pylint: disable=broad-exception-caught
6173
# If downloader not available or error, continue
6274
pass

internacia/downloader.py

Lines changed: 155 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
# GitHub API endpoints
1616
GITHUB_API_BASE = "https://api.github.com"
17-
GITHUB_REPO = "commondataio/internacia-db"
17+
GITHUB_REPO = "datenoio/internacia-db"
1818
GITHUB_RELEASES_URL = f"{GITHUB_API_BASE}/repos/{GITHUB_REPO}/releases"
1919
GITHUB_LATEST_RELEASE_URL = f"{GITHUB_RELEASES_URL}/latest"
2020

@@ -83,6 +83,23 @@ def get_latest_version() -> str:
8383
timeout=10,
8484
headers={"Accept": "application/vnd.github.v3+json"},
8585
)
86+
87+
# Handle 404 specifically with a helpful error message
88+
if response.status_code == 404:
89+
error_msg = (
90+
f"Repository or release not found at {GITHUB_REPO}.\n"
91+
"This could mean:\n"
92+
" 1. The repository doesn't exist or has been renamed\n"
93+
" 2. The repository has no releases yet\n"
94+
" 3. The repository is private and requires authentication\n\n"
95+
"Alternative: You can build the database locally by:\n"
96+
" 1. Cloning the internacia-db repository\n"
97+
" 2. Running: python3 scripts/builder.py build --formats duckdb\n"
98+
" 3. Setting INTERNACIA_DB_PATH to point to the built database"
99+
)
100+
logger.error(error_msg)
101+
raise DownloadError(error_msg)
102+
86103
response.raise_for_status()
87104

88105
data = response.json()
@@ -94,6 +111,24 @@ def get_latest_version() -> str:
94111
logger.info("Latest version: %s", version)
95112
return version
96113

114+
except requests.exceptions.HTTPError as e:
115+
if e.response is not None and e.response.status_code == 404:
116+
# Already handled above, but catch here for safety
117+
error_msg = (
118+
f"Repository or release not found at {GITHUB_REPO}.\n"
119+
"This could mean:\n"
120+
" 1. The repository doesn't exist or has been renamed\n"
121+
" 2. The repository has no releases yet\n"
122+
" 3. The repository is private and requires authentication\n\n"
123+
"Alternative: You can build the database locally by:\n"
124+
" 1. Cloning the internacia-db repository\n"
125+
" 2. Running: python3 scripts/builder.py build --formats duckdb\n"
126+
" 3. Setting INTERNACIA_DB_PATH to point to the built database"
127+
)
128+
logger.error(error_msg)
129+
raise DownloadError(error_msg) from e
130+
logger.error("Failed to fetch latest version: %s", str(e))
131+
raise DownloadError("Failed to fetch latest version: %s" % str(e)) from e
97132
except requests.exceptions.RequestException as e:
98133
logger.error("Failed to fetch latest version: %s", str(e))
99134
raise DownloadError("Failed to fetch latest version: %s" % str(e)) from e
@@ -130,6 +165,25 @@ def get_database_info(version: Optional[str] = None) -> dict:
130165
timeout=10,
131166
headers={"Accept": "application/vnd.github.v3+json"},
132167
)
168+
169+
# Handle 404 specifically with a helpful error message
170+
if response.status_code == 404:
171+
error_msg = (
172+
f"Release not found for {GITHUB_REPO}"
173+
+ (f" (version: {version})" if version else "") + ".\n"
174+
"This could mean:\n"
175+
" 1. The repository doesn't exist or has been renamed\n"
176+
" 2. The specified version doesn't exist\n"
177+
" 3. The repository has no releases yet\n"
178+
" 4. The repository is private and requires authentication\n\n"
179+
"Alternative: You can build the database locally by:\n"
180+
" 1. Cloning the internacia-db repository\n"
181+
" 2. Running: python3 scripts/builder.py build --formats duckdb\n"
182+
" 3. Setting INTERNACIA_DB_PATH to point to the built database"
183+
)
184+
logger.error(error_msg)
185+
raise DownloadError(error_msg)
186+
133187
response.raise_for_status()
134188

135189
data = response.json()
@@ -143,7 +197,41 @@ def get_database_info(version: Optional[str] = None) -> dict:
143197
break
144198

145199
if not db_asset:
146-
raise VersionError(f"No database file found in release {version or 'latest'}")
200+
release_tag = data.get("tag_name", version or "latest")
201+
asset_names = [asset.get("name", "") for asset in assets] if assets else []
202+
203+
error_msg = (
204+
f"No database file (.duckdb) found in release {release_tag}.\n"
205+
)
206+
207+
if assets:
208+
error_msg += (
209+
f"Available assets in this release: {', '.join(asset_names) if asset_names else 'none'}\n"
210+
)
211+
else:
212+
error_msg += (
213+
"This release has no assets attached.\n"
214+
)
215+
216+
error_msg += (
217+
"\nThis could mean:\n"
218+
" 1. The database file hasn't been uploaded to this release yet\n"
219+
" 2. The database file is available in a different release\n"
220+
" 3. The database needs to be built from source\n\n"
221+
"Alternatives:\n"
222+
" 1. Check other releases for a database file:\n"
223+
" from internacia import list_releases\n"
224+
" releases = list_releases()\n"
225+
" # Look for releases with has_database=True\n"
226+
" 2. Build the database locally:\n"
227+
" - Clone the internacia-db repository\n"
228+
" - Run: python3 scripts/builder.py build --formats duckdb\n"
229+
" - Set INTERNACIA_DB_PATH to point to the built database\n"
230+
" 3. Use the source data directly from the repository"
231+
)
232+
233+
logger.error(error_msg)
234+
raise VersionError(error_msg)
147235

148236
return {
149237
"version": data.get("tag_name", version or "latest"),
@@ -154,6 +242,26 @@ def get_database_info(version: Optional[str] = None) -> dict:
154242
"published_at": data.get("published_at"),
155243
}
156244

245+
except requests.exceptions.HTTPError as e:
246+
if e.response is not None and e.response.status_code == 404:
247+
# Already handled above, but catch here for safety
248+
error_msg = (
249+
f"Release not found for {GITHUB_REPO}"
250+
+ (f" (version: {version})" if version else "") + ".\n"
251+
"This could mean:\n"
252+
" 1. The repository doesn't exist or has been renamed\n"
253+
" 2. The specified version doesn't exist\n"
254+
" 3. The repository has no releases yet\n"
255+
" 4. The repository is private and requires authentication\n\n"
256+
"Alternative: You can build the database locally by:\n"
257+
" 1. Cloning the internacia-db repository\n"
258+
" 2. Running: python3 scripts/builder.py build --formats duckdb\n"
259+
" 3. Setting INTERNACIA_DB_PATH to point to the built database"
260+
)
261+
logger.error(error_msg)
262+
raise DownloadError(error_msg) from e
263+
logger.error("Failed to fetch database info: %s", str(e))
264+
raise DownloadError(f"Failed to fetch database info: {str(e)}") from e
157265
except requests.exceptions.RequestException as e:
158266
logger.error("Failed to fetch database info: %s", str(e))
159267
raise DownloadError(f"Failed to fetch database info: {str(e)}") from e
@@ -310,6 +418,51 @@ def download_database(
310418
raise DownloadError(f"Unexpected error during download: {str(e)}") from e
311419

312420

421+
def list_releases() -> list:
422+
"""
423+
List all available releases and their database file status.
424+
425+
Returns:
426+
List of dictionaries with release information:
427+
- tag_name: str, release tag
428+
- published_at: str, publication date
429+
- has_database: bool, whether release has a .duckdb file
430+
- asset_names: list, names of all assets in the release
431+
432+
Raises:
433+
DownloadError: If unable to fetch releases
434+
"""
435+
try:
436+
logger.debug("Fetching releases from: %s", GITHUB_RELEASES_URL)
437+
response = requests.get(
438+
GITHUB_RELEASES_URL,
439+
timeout=10,
440+
headers={"Accept": "application/vnd.github.v3+json"},
441+
)
442+
response.raise_for_status()
443+
444+
releases_data = response.json()
445+
releases = []
446+
447+
for release in releases_data:
448+
assets = release.get("assets", [])
449+
asset_names = [asset.get("name", "") for asset in assets]
450+
has_database = any(name.endswith(".duckdb") for name in asset_names)
451+
452+
releases.append({
453+
"tag_name": release.get("tag_name", ""),
454+
"published_at": release.get("published_at", ""),
455+
"has_database": has_database,
456+
"asset_names": asset_names,
457+
})
458+
459+
return releases
460+
461+
except requests.exceptions.RequestException as e:
462+
logger.error("Failed to fetch releases: %s", str(e))
463+
raise DownloadError(f"Failed to fetch releases: {str(e)}") from e
464+
465+
313466
def check_for_updates() -> dict:
314467
"""
315468
Check if a newer database version is available.

internacia/intblocks.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,12 @@ def get_by_member(self, country_code: str) -> List[Intblock]:
121121
Returns:
122122
List of block dictionaries
123123
"""
124+
# In DuckDB, when UNNESTing a list of structs, the result column is named 'unnest'
125+
# and contains the struct. Access struct fields using unnest.field
124126
query = """
125127
SELECT DISTINCT i.*
126128
FROM intblocks i, UNNEST(i.includes) AS member
127-
WHERE member.id = ?
129+
WHERE unnest.id = ?
128130
"""
129131
results = self._db.execute_query_dict(query, (country_code.upper(),))
130132
return results

0 commit comments

Comments
 (0)