fix: coderabbit

Integer-Ctrl · Integer-Ctrl · commit 5558c042b6ce · 2025-12-09T10:28:52.000+01:00
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ Command-line and Python client for downloading and deploying datasets on DBpedia
   - [Delete](#cli-delete)
 - [Module Usage](#module-usage)
   - [Deploy](#module-deploy)
-- [Contributing](#contributing)
+- [Development & Contributing](#development--contributing)
   - [Linting](#linting)
   - [Testing](#testing)
 
@@ -558,7 +558,7 @@ from databusclient import deploy
 deploy(dataset, "mysterious API key")
 ```
 
-## Development
+## Development & Contributing
 
 Install development dependencies yourself or via [Poetry](https://python-poetry.org/):
 
@@ -570,7 +570,7 @@ poetry install --with dev
 
 The used linter is [Ruff](https://ruff.rs/). Ruff is configured in `pyproject.toml` and is enforced in CI (`.github/workflows/ruff.yml`).
 
-For development, you can run linting locally with `ruff check . ` and optionally auto-format with `ruff format .`.
+For development, you can run linting locally with `ruff check .` and optionally auto-format with `ruff format .`.
 
 To ensure compatibility with the `pyproject.toml` configured dependencies, run Ruff via Poetry:
 
diff --git a/databusclient/api/download.py b/databusclient/api/download.py
@@ -114,9 +114,12 @@ def _download_file(
             file.write(data)
     progress_bar.close()
 
-    # TODO: could be a problem of github raw / openflaas
+    # TODO: keep check or remove?
     if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
-        raise IOError("Downloaded size does not match Content-Length header")
+        localsize = os.path.getsize(filename)
+        print(f"\nHeaders: {response.headers}")
+        print(f"\n[WARNING]: Downloaded size {progress_bar.n} does not match Content-Length header {total_size_in_bytes} ( local file size: {localsize})")
+        # raise IOError("Downloaded size does not match Content-Length header")
 
 
 def _download_files(
@@ -407,12 +410,20 @@ def _get_databus_versions_of_artifact(
     json_dict = json.loads(json_str)
     versions = json_dict.get("databus:hasVersion")
 
-    # Single version case {}
+    if versions is None:
+        raise ValueError("No 'databus:hasVersion' field in artifact JSON-LD")
+
     if isinstance(versions, dict):
         versions = [versions]
-    # Multiple versions case [{}, {}]
+    elif not isinstance(versions, list):
+        raise ValueError(
+            f"Unexpected type for 'databus:hasVersion': {type(versions).__name__}"
+        )
+
+    version_urls = [
+        v["@id"] for v in versions if isinstance(v, dict) and "@id" in v
+    ]
 
-    version_urls = [v["@id"] for v in versions if "@id" in v]
     if not version_urls:
         raise ValueError("No versions found in artifact JSON-LD")
 
@@ -435,13 +446,16 @@ def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]:
     List of all file download URLs in the artifact version.
     """
 
-    databusIdUrl = []
+    databusIdUrl: List[str] = []
+    
     json_dict = json.loads(json_str)
     graph = json_dict.get("@graph", [])
     for node in graph:
         if node.get("@type") == "Part":
-            id = node.get("file")
-            databusIdUrl.append(id)
+            file_uri = node.get("file")
+            if not isinstance(file_uri, str):
+                continue
+            databusIdUrl.append(file_uri)
     return databusIdUrl
 
 
@@ -488,10 +502,24 @@ def _get_databus_artifacts_of_group(json_str: str) -> List[str]:
     Returns a list of artifact URLs.
     """
     json_dict = json.loads(json_str)
-    artifacts = json_dict.get("databus:hasArtifact", [])
+    artifacts = json_dict.get("databus:hasArtifact")
+
+    if artifacts is None:
+        return []
 
-    result = []
-    for item in artifacts:
+    if isinstance(artifacts, dict):
+        artifacts_iter = [artifacts]
+    elif isinstance(artifacts, list):
+        artifacts_iter = artifacts
+    else:
+        raise ValueError(
+            f"Unexpected type for 'databus:hasArtifact': {type(artifacts).__name__}"
+        )
+
+    result: List[str] = []
+    for item in artifacts_iter:
+        if not isinstance(item, dict):
+            continue
         uri = item.get("@id")
         if not uri:
             continue
@@ -538,7 +566,7 @@ def download(
             # Auto-detect sparql endpoint from host if not given
             if uri_endpoint is None:
                 uri_endpoint = f"https://{host}/sparql"
-            print(f"SPARQL endpoint {endpoint}")
+            print(f"SPARQL endpoint {uri_endpoint}")
 
             if group == "collections" and artifact is not None:
                 print(f"Downloading collection: {databusURI}")