From 9448228cac7d3d705397e29a2dcf0a0552edaee5 Mon Sep 17 00:00:00 2001 From: DhanashreePetare Date: Thu, 18 Dec 2025 17:41:57 +0530 Subject: [PATCH 1/5] Restrict Vault token exchange to specific hosts; improve auth errors; add tests (fixes #19) --- databusclient/api/download.py | 70 ++++++++++++++++++----- databusclient/cli.py | 25 ++++---- tests/conftest.py | 30 ++++++++++ tests/test_download_auth.py | 105 ++++++++++++++++++++++++++++++++++ 4 files changed, 206 insertions(+), 24 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_download_auth.py diff --git a/databusclient/api/download.py b/databusclient/api/download.py index df7c53c..9bcb81f 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -1,6 +1,7 @@ import json import os from typing import List +from urllib.parse import urlparse import requests from SPARQLWrapper import JSON, SPARQLWrapper @@ -12,6 +13,18 @@ ) +# Hosts that require Vault token based authentication. Central source of truth. +VAULT_REQUIRED_HOSTS = { + "data.dbpedia.io", + "data.dev.dbpedia.link", +} + + +class DownloadAuthError(Exception): + """Raised when an authorization problem occurs during download.""" + + + def _download_file( url, localDir, @@ -52,13 +65,21 @@ def _download_file( os.makedirs(dirpath, exist_ok=True) # Create the necessary directories # --- 1. Get redirect URL by requesting HEAD --- headers = {} + + # Determine hostname early and fail fast if this host requires Vault token + parsed = urlparse(url) + host = parsed.hostname + if host in VAULT_REQUIRED_HOSTS and not vault_token_file: + raise DownloadAuthError( + f"Vault token required for host '{host}', but no token was provided. Please use --vault-token." + ) + # --- 1a. public databus --- response = requests.head(url, timeout=30) # --- 1b. Databus API key required --- if response.status_code == 401: - # print(f"API key required for {url}") if not databus_key: - raise ValueError("Databus API key not given for protected download") + raise DownloadAuthError("Databus API key not given for protected download") headers = {"X-API-KEY": databus_key} response = requests.head(url, headers=headers, timeout=30) @@ -81,25 +102,48 @@ def _download_file( response = requests.get( url, headers=headers, stream=True, allow_redirects=True, timeout=30 ) - www = response.headers.get( - "WWW-Authenticate", "" - ) # Check if authentication is required + www = response.headers.get("WWW-Authenticate", "") # Check if authentication is required - # --- 3. If redirected to authentication 401 Unauthorized, get Vault token and retry --- + # --- 3. Handle authentication responses --- + # 3a. Server requests Bearer auth if response.status_code == 401 and "bearer" in www.lower(): - print(f"Authentication required for {url}") - if not (vault_token_file): - raise ValueError("Vault token file not given for protected download") + # If host is not configured for Vault, do not attempt token exchange + if host not in VAULT_REQUIRED_HOSTS: + raise DownloadAuthError( + "Server requests Bearer authentication but this host is not configured for Vault token exchange." + " Try providing a databus API key with --databus-key or contact your administrator." + ) + + # Host requires Vault; ensure token file provided + if not vault_token_file: + raise DownloadAuthError( + f"Vault token required for host '{host}', but no token was provided. Please use --vault-token." + ) - # --- 3a. Fetch Vault token --- - # TODO: cache token + # --- 3b. Fetch Vault token and retry --- vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id) headers["Authorization"] = f"Bearer {vault_token}" - headers.pop("Accept-Encoding") + headers.pop("Accept-Encoding", None) - # --- 3b. Retry with token --- + # Retry with token response = requests.get(url, headers=headers, stream=True, timeout=30) + # Map common auth failures to friendly messages + if response.status_code == 401: + raise DownloadAuthError("Vault token is invalid or expired. Please generate a new token.") + if response.status_code == 403: + raise DownloadAuthError("Vault token is valid but has insufficient permissions to access this file.") + + # 3c. Generic forbidden without Bearer challenge + if response.status_code == 403: + raise DownloadAuthError("Access forbidden: your token or API key does not have permission to download this file.") + + # 3d. Generic unauthorized without Bearer + if response.status_code == 401: + raise DownloadAuthError( + "Unauthorized: access denied. Check your --databus-key or --vault-token settings." + ) + try: response.raise_for_status() # Raise if still failing except requests.exceptions.HTTPError as e: diff --git a/databusclient/cli.py b/databusclient/cli.py index 97430f5..069408e 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -7,7 +7,7 @@ import databusclient.api.deploy as api_deploy from databusclient.api.delete import delete as api_delete -from databusclient.api.download import download as api_download +from databusclient.api.download import download as api_download, DownloadAuthError from databusclient.extensions import webdav @@ -171,16 +171,19 @@ def download( """ Download datasets from databus, optionally using vault access if vault options are provided. """ - api_download( - localDir=localdir, - endpoint=databus, - databusURIs=databusuris, - token=vault_token, - databus_key=databus_key, - all_versions=all_versions, - auth_url=authurl, - client_id=clientid, - ) + try: + api_download( + localDir=localdir, + endpoint=databus, + databusURIs=databusuris, + token=vault_token, + databus_key=databus_key, + all_versions=all_versions, + auth_url=authurl, + client_id=clientid, + ) + except DownloadAuthError as e: + raise click.ClickException(str(e)) @app.command() diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5f4c0a2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,30 @@ +import sys +import types + +# Provide a lightweight fake SPARQLWrapper module for tests when not installed. +if "SPARQLWrapper" not in sys.modules: + mod = types.ModuleType("SPARQLWrapper") + mod.JSON = None + + class DummySPARQL: + def __init__(self, *args, **kwargs): + pass + + def setQuery(self, q): + self._q = q + + def setReturnFormat(self, f): + self._fmt = f + + def setCustomHttpHeaders(self, h): + self._headers = h + + def query(self): + class R: + def convert(self): + return {"results": {"bindings": []}} + + return R() + + mod.SPARQLWrapper = DummySPARQL + sys.modules["SPARQLWrapper"] = mod diff --git a/tests/test_download_auth.py b/tests/test_download_auth.py new file mode 100644 index 0000000..ff4776a --- /dev/null +++ b/tests/test_download_auth.py @@ -0,0 +1,105 @@ +import os +from unittest.mock import Mock, patch + +import pytest + +import requests + +import databusclient.api.download as dl + +from databusclient.api.download import VAULT_REQUIRED_HOSTS, DownloadAuthError + + +def make_response(status=200, headers=None, content=b""): + headers = headers or {} + mock = Mock() + mock.status_code = status + mock.headers = headers + mock.content = content + + def iter_content(chunk_size): + if content: + yield content + else: + return + + mock.iter_content = lambda chunk: iter(iter_content(chunk)) + + def raise_for_status(): + if mock.status_code >= 400: + raise requests.exceptions.HTTPError() + + mock.raise_for_status = raise_for_status + return mock + + +def test_vault_host_no_token_raises(): + vault_host = next(iter(VAULT_REQUIRED_HOSTS)) + url = f"https://{vault_host}/some/protected/file.ttl" + + with pytest.raises(DownloadAuthError) as exc: + dl._download_file(url, localDir='.', vault_token_file=None) + + assert "Vault token required" in str(exc.value) + + +def test_non_vault_host_no_token_allows_download(monkeypatch): + url = "https://example.com/public/file.txt" + + resp_head = make_response(status=200, headers={}) + resp_get = make_response(status=200, headers={"content-length": "0"}, content=b"") + + with patch("requests.head", return_value=resp_head), patch( + "requests.get", return_value=resp_get + ): + # should not raise + dl._download_file(url, localDir='.', vault_token_file=None) + + +def test_401_after_token_exchange_reports_invalid_token(monkeypatch): + vault_host = next(iter(VAULT_REQUIRED_HOSTS)) + url = f"https://{vault_host}/protected/file.ttl" + + # initial head and get -> 401 with Bearer + resp_head = make_response(status=200, headers={}) + resp_401 = make_response(status=401, headers={"WWW-Authenticate": "Bearer realm=\"auth\""}) + + # after retry with token -> still 401 + resp_401_retry = make_response(status=401, headers={}) + + # Mock requests.get side effects: first 401 (challenge), then 401 after token + get_side_effects = [resp_401, resp_401_retry] + + # Mock token exchange responses + post_resp_1 = Mock() + post_resp_1.json.return_value = {"access_token": "ACCESS"} + post_resp_2 = Mock() + post_resp_2.json.return_value = {"access_token": "VAULT"} + + with patch("requests.head", return_value=resp_head), patch( + "requests.get", side_effect=get_side_effects + ), patch("requests.post", side_effect=[post_resp_1, post_resp_2]): + # set REFRESH_TOKEN so __get_vault_access__ doesn't try to open a file + monkeypatch.setenv("REFRESH_TOKEN", "x" * 90) + + with pytest.raises(DownloadAuthError) as exc: + dl._download_file(url, localDir='.', vault_token_file="/does/not/matter") + + assert "invalid or expired" in str(exc.value) + + +def test_403_reports_insufficient_permissions(): + vault_host = next(iter(VAULT_REQUIRED_HOSTS)) + url = f"https://{vault_host}/protected/file.ttl" + + resp_head = make_response(status=200, headers={}) + resp_403 = make_response(status=403, headers={}) + + with patch("requests.head", return_value=resp_head), patch( + "requests.get", return_value=resp_403 + ): + # provide a token path so early check does not block + with pytest.raises(DownloadAuthError) as exc: + dl._download_file(url, localDir='.', vault_token_file="/some/token/file") + + assert "permission" in str(exc.value) or "forbidden" in str(exc.value) From c92e58fd99f2742873add834cdacaf8776587073 Mon Sep 17 00:00:00 2001 From: DhanashreePetare Date: Thu, 18 Dec 2025 18:10:11 +0530 Subject: [PATCH 2/5] Restrict Vault token exchange to specific hosts; improve auth errors; add tests and docs note (fixes #19) --- README.md | 2 ++ databusclient/api/download.py | 16 ++++++++++++---- tests/test_download_auth.py | 1 - 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index dc9991f..b485008 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,8 @@ docker run --rm -v $(pwd):/data dbpedia/databus-python-client download $DOWNLOAD - If no `--localdir` is provided, the current working directory is used as base directory. The downloaded files will be stored in the working directory in a folder structure according to the Databus layout, i.e. `./$ACCOUNT/$GROUP/$ARTIFACT/$VERSION/`. - `--vault-token` - If the dataset/files to be downloaded require vault authentication, you need to provide a vault token with `--vault-token /path/to/vault-token.dat`. See [Registration (Access Token)](#registration-access-token) for details on how to get a vault token. + + Note: Vault tokens are only required for certain protected Databus hosts (for example: `data.dbpedia.io`, `data.dev.dbpedia.link`). The client now detects those hosts and will fail early with a clear message if a token is required but not provided. Do not pass `--vault-token` for public downloads. - `--databus-key` - If the databus is protected and needs API key authentication, you can provide the API key with `--databus-key YOUR_API_KEY`. diff --git a/databusclient/api/download.py b/databusclient/api/download.py index 9bcb81f..640cc4a 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -66,7 +66,9 @@ def _download_file( # --- 1. Get redirect URL by requesting HEAD --- headers = {} - # Determine hostname early and fail fast if this host requires Vault token + # Determine hostname early and fail fast if this host requires Vault token. + # This prevents confusing 401/403 errors later and tells the user exactly + # what to do (provide --vault-token). parsed = urlparse(url) host = parsed.hostname if host in VAULT_REQUIRED_HOSTS and not vault_token_file: @@ -105,22 +107,28 @@ def _download_file( www = response.headers.get("WWW-Authenticate", "") # Check if authentication is required # --- 3. Handle authentication responses --- - # 3a. Server requests Bearer auth + # 3a. Server requests Bearer auth. Only attempt token exchange for hosts + # we explicitly consider Vault-protected (VAULT_REQUIRED_HOSTS). This avoids + # sending tokens to unrelated hosts and makes auth behavior predictable. if response.status_code == 401 and "bearer" in www.lower(): - # If host is not configured for Vault, do not attempt token exchange + # If host is not configured for Vault, do not attempt token exchange. if host not in VAULT_REQUIRED_HOSTS: raise DownloadAuthError( "Server requests Bearer authentication but this host is not configured for Vault token exchange." " Try providing a databus API key with --databus-key or contact your administrator." ) - # Host requires Vault; ensure token file provided + # Host requires Vault; ensure token file provided. if not vault_token_file: raise DownloadAuthError( f"Vault token required for host '{host}', but no token was provided. Please use --vault-token." ) # --- 3b. Fetch Vault token and retry --- + # Token exchange is potentially sensitive and should only be performed + # for known hosts. __get_vault_access__ handles reading the refresh + # token and exchanging it; errors are translated to DownloadAuthError + # for user-friendly CLI output. vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id) headers["Authorization"] = f"Bearer {vault_token}" headers.pop("Accept-Encoding", None) diff --git a/tests/test_download_auth.py b/tests/test_download_auth.py index ff4776a..7225e08 100644 --- a/tests/test_download_auth.py +++ b/tests/test_download_auth.py @@ -1,4 +1,3 @@ -import os from unittest.mock import Mock, patch import pytest From 22e4d862d8cf065f2409a0b8e65daf4ef64bf4cc Mon Sep 17 00:00:00 2001 From: DhanashreePetare Date: Thu, 18 Dec 2025 19:00:50 +0530 Subject: [PATCH 3/5] Release 0.15: bump version, add changelog, docstrings(issue #35) --- CHANGELOG.md | 22 ++++++++++++++++ README.md | 6 +++++ databusclient/__init__.py | 13 +++++++++ databusclient/__main__.py | 18 ++++++++++++- databusclient/api/delete.py | 27 +++++++++++++++++++ databusclient/api/deploy.py | 42 ++++++++++++++++++++++++++++++ databusclient/api/utils.py | 24 ++++++++++++----- databusclient/cli.py | 6 ++++- databusclient/extensions/webdav.py | 32 +++++++++++++++++++++++ pyproject.toml | 2 +- 10 files changed, 183 insertions(+), 9 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..1906d99 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [0.15] - 2025-12-18 +- Prepare new PyPI release 0.15 (skipping 0.13/0.14 as requested). +- Improve Vault authentication: host-restricted token exchange and clearer errors. +- Add tests for Vault auth behavior. +- Add docstrings to increase docstring coverage for CI. + +Note: After merging this branch, publish a PyPI release (version 0.15) so +`pip install databusclient` reflects the updated CLI behavior and bug fixes. +# Changelog + +## 0.15 - Prepared release + +- Prepare PyPI release 0.15. +- Restrict Vault token exchange to known hosts and provide clearer auth errors. +- Add tests for Vault auth behavior. +- Documentation: note about Vault-hosts and `--vault-token` usage. + +(See PR and issue tracker for details.) diff --git a/README.md b/README.md index b485008..a4e3b6f 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,12 @@ Before using the client, install it via pip: python3 -m pip install databusclient ``` +Note: the PyPI release was updated and this repository prepares version `0.15`. If you previously installed `databusclient` via `pip` and observe different CLI behavior, upgrade to the latest release: + +```bash +python3 -m pip install --upgrade databusclient==0.15 +``` + You can then use the client in the command line: ```bash diff --git a/databusclient/__init__.py b/databusclient/__init__.py index d15edb6..92fe8b7 100644 --- a/databusclient/__init__.py +++ b/databusclient/__init__.py @@ -1,3 +1,10 @@ +"""Top-level package for the databus Python client. + +This module exposes a small set of convenience functions and the CLI +entrypoint so the package can be used as a library or via +``python -m databusclient``. +""" + from databusclient import cli from databusclient.api.deploy import create_dataset, create_distribution, deploy @@ -5,4 +12,10 @@ def run(): + """Start the Click CLI application. + + This function is used by the ``__main__`` module and the package + entrypoint to invoke the command line interface. + """ + cli.app() diff --git a/databusclient/__main__.py b/databusclient/__main__.py index 8fe6fda..3a50f9a 100644 --- a/databusclient/__main__.py +++ b/databusclient/__main__.py @@ -1,3 +1,19 @@ +"""Module used for ``python -m databusclient`` execution. + +Runs the package's CLI application. +""" + from databusclient import cli -cli.app() + +def main(): + """Invoke the CLI application. + + Kept as a named function for easier testing and clarity. + """ + + cli.app() + + +if __name__ == "__main__": + main() diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py index 2ea8fb4..e96f97b 100644 --- a/databusclient/api/delete.py +++ b/databusclient/api/delete.py @@ -1,3 +1,10 @@ +"""Helpers for deleting Databus resources via the Databus HTTP API. + +This module provides utilities to delete groups, artifacts and versions on a +Databus instance using authenticated HTTP requests. The class `DeleteQueue` +also allows batching of deletions. +""" + import json from typing import List @@ -16,23 +23,43 @@ class DeleteQueue: """ def __init__(self, databus_key: str): + """Create a DeleteQueue bound to a given Databus API key. + + Args: + databus_key: API key used to authenticate deletion requests. + """ self.databus_key = databus_key self.queue: set[str] = set() def add_uri(self, databusURI: str): + """Add a single Databus URI to the deletion queue. + + The URI will be deleted when `execute()` is called. + """ self.queue.add(databusURI) def add_uris(self, databusURIs: List[str]): + """Add multiple Databus URIs to the deletion queue. + + Args: + databusURIs: Iterable of full Databus URIs. + """ for uri in databusURIs: self.queue.add(uri) def is_empty(self) -> bool: + """Return True if the queue is empty.""" return len(self.queue) == 0 def is_not_empty(self) -> bool: + """Return True if the queue contains any URIs.""" return len(self.queue) > 0 def execute(self): + """Execute all queued deletions. + + Each queued URI will be deleted using `_delete_resource`. + """ for uri in self.queue: print(f"[DELETE] {uri}") _delete_resource( diff --git a/databusclient/api/deploy.py b/databusclient/api/deploy.py index ef8ebf5..23c77ea 100644 --- a/databusclient/api/deploy.py +++ b/databusclient/api/deploy.py @@ -1,3 +1,10 @@ +"""Build and publish Databus datasets (JSON-LD) from provided metadata. + +This module exposes helpers to create distribution strings, compute file +information (sha256 and size), construct dataset JSON-LD payloads and +publish them to a Databus instance using the Databus publish API. +""" + import hashlib import json from enum import Enum @@ -25,6 +32,13 @@ class DeployLogLevel(Enum): def _get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: + """Parse content-variant key/value pairs from a distribution string. + + The CLI supports passing a distribution as ``url|lang=en_type=parsed|...``. + This helper extracts the ``lang``/``type`` style key/value pairs as a + dictionary. + """ + args = distribution_str.split("|") # cv string is ALWAYS at position 1 after the URL @@ -50,6 +64,12 @@ def _get_content_variants(distribution_str: str) -> Optional[Dict[str, str]]: def _get_filetype_definition( distribution_str: str, ) -> Tuple[Optional[str], Optional[str]]: + """Extract an explicit file format and compression from a distribution string. + + Returns (file_extension, compression) where each may be ``None`` if the + format should be inferred from the URL path. + """ + file_ext = None compression = None @@ -87,6 +107,12 @@ def _get_filetype_definition( def _get_extensions(distribution_str: str) -> Tuple[str, str, str]: + """Return tuple `(extension_part, format_extension, compression)`. + + ``extension_part`` is the textual extension appended to generated + filenames (e.g. ".ttl.gz"). + """ + extension_part = "" format_extension, compression = _get_filetype_definition(distribution_str) @@ -126,6 +152,11 @@ def _get_extensions(distribution_str: str) -> Tuple[str, str, str]: def _get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int]]: + """Parse an optional ``sha256sum:length`` tuple from a distribution string. + + Returns (sha256sum, content_length) or (None, None) when not provided. + """ + metadata_list = distribution_str.split("|")[1:] # check whether there is the shasum:length tuple separated by : if len(metadata_list) == 0 or ":" not in metadata_list[-1]: @@ -146,6 +177,12 @@ def _get_file_stats(distribution_str: str) -> Tuple[Optional[str], Optional[int] def _load_file_stats(url: str) -> Tuple[str, int]: + """Download the file at ``url`` and compute its SHA-256 and length. + + This is used as a fallback when the caller did not supply checksum/size + information in the CLI or metadata file. + """ + resp = requests.get(url, timeout=30) if resp.status_code >= 400: raise requests.exceptions.RequestException(response=resp) @@ -156,6 +193,11 @@ def _load_file_stats(url: str) -> Tuple[str, int]: def get_file_info(distribution_str: str) -> Tuple[Dict[str, str], str, str, str, int]: + """Return parsed file information for a distribution string. + + Returns a tuple `(cvs, format_extension, compression, sha256sum, size)`. + """ + cvs = _get_content_variants(distribution_str) extension_part, format_extension, compression = _get_extensions(distribution_str) diff --git a/databusclient/api/utils.py b/databusclient/api/utils.py index 7e27ff3..25c5300 100644 --- a/databusclient/api/utils.py +++ b/databusclient/api/utils.py @@ -1,3 +1,9 @@ +"""Utility helpers used by the API submodules. + +Contains small parsing helpers and HTTP helpers that are shared by +`download`, `deploy` and `delete` modules. +""" + from typing import Optional, Tuple import requests @@ -24,6 +30,12 @@ def get_databus_id_parts_from_file_url( A tuple containing (host, accountId, groupId, artifactId, versionId, fileId). Each element is a string or None if not present. """ + """Split a Databus URI into its six parts. + + The returned tuple is (host, accountId, groupId, artifactId, versionId, fileId). + Missing parts are returned as ``None``. + """ + uri = uri.removeprefix("https://").removeprefix("http://") parts = uri.strip("/").split("/") parts += [None] * (6 - len(parts)) # pad with None if less than 6 parts @@ -31,16 +43,16 @@ def get_databus_id_parts_from_file_url( def fetch_databus_jsonld(uri: str, databus_key: str | None = None) -> str: - """ - Retrieve JSON-LD representation of a databus resource. + """Fetch the JSON-LD representation of a Databus resource. - Parameters: - - uri: The full databus URI - - databus_key: Optional Databus API key for authentication on protected resources + Args: + uri: Full Databus resource URI. + databus_key: Optional API key for protected resources. Returns: - JSON-LD string representation of the databus resource. + The response body as a string containing JSON-LD. """ + headers = {"Accept": "application/ld+json"} if databus_key is not None: headers["X-API-KEY"] = databus_key diff --git a/databusclient/cli.py b/databusclient/cli.py index 069408e..1a345f3 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -13,7 +13,11 @@ @click.group() def app(): - """Databus Client CLI""" + """Databus Client CLI. + + Provides `deploy`, `download`, and `delete` commands for interacting + with the DBpedia Databus. + """ pass diff --git a/databusclient/extensions/webdav.py b/databusclient/extensions/webdav.py index c0747f6..7981a49 100644 --- a/databusclient/extensions/webdav.py +++ b/databusclient/extensions/webdav.py @@ -1,3 +1,11 @@ +"""WebDAV/Nextcloud upload helper used by the deploy CLI. + +This module computes SHA-256 checksums and sizes for local files and uses +``rclone`` to copy files to a remote WebDAV/Nextcloud instance. The +`upload_to_webdav` function returns a list of metadata dictionaries suitable +for passing to ``deploy_from_metadata``. +""" + import hashlib import os import posixpath @@ -6,6 +14,14 @@ def compute_sha256_and_length(filepath): + """Compute the SHA-256 hex digest and total byte length of a file. + + Args: + filepath: Path to the file to hash. + + Returns: + Tuple of (sha256_hex, size_in_bytes). + """ sha256 = hashlib.sha256() total_length = 0 with open(filepath, "rb") as f: @@ -19,6 +35,11 @@ def compute_sha256_and_length(filepath): def get_all_files(path): + """Return a list of all files for a path. + + If `path` is a file, returns a single-element list. If it is a directory, + walks the directory recursively and returns absolute file paths. + """ if os.path.isfile(path): return [path] files = [] @@ -31,6 +52,17 @@ def get_all_files(path): def upload_to_webdav( source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str ): + """Upload local files or folders to a configured rclone remote. + + Args: + source_paths: List of files or directories to upload. + remote_name: Name of the rclone remote (e.g., "nextcloud"). + remote_path: Destination path on the remote. + webdav_url: Public WebDAV URL used to construct download URLs. + + Returns: + A list of dicts with keys: ``filename``, ``checksum``, ``size``, ``url``. + """ result = [] for path in source_paths: if not os.path.exists(path): diff --git a/pyproject.toml b/pyproject.toml index 5593c74..92f479b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databusclient" -version = "0.14" +version = "0.15" description = "A simple client for submitting, downloading, and deleting data on the DBpedia Databus" authors = ["DBpedia Association"] license = "Apache-2.0 License" From 332d0c7caff71cfc3164c0c81d242433c599d254 Mon Sep 17 00:00:00 2001 From: DhanashreePetare Date: Wed, 31 Dec 2025 14:39:02 +0530 Subject: [PATCH 4/5] Prepare PyPI release 0.15 with build artifacts and publishing guide (Issue #35) --- .gitignore | 2 + CHANGELOG.md | 30 ++++++------ RELEASE_NOTES.md | 97 +++++++++++++++++++++++++++++++++++++++ databusclient/__init__.py | 1 + 4 files changed, 116 insertions(+), 14 deletions(-) create mode 100644 RELEASE_NOTES.md diff --git a/.gitignore b/.gitignore index d22cb37..c53d619 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # project-specific tmp/ +vault-token.dat +test-download/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 1906d99..c86fc78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,21 +2,23 @@ All notable changes to this project will be documented in this file. -## [0.15] - 2025-12-18 -- Prepare new PyPI release 0.15 (skipping 0.13/0.14 as requested). -- Improve Vault authentication: host-restricted token exchange and clearer errors. -- Add tests for Vault auth behavior. -- Add docstrings to increase docstring coverage for CI. +## [0.15] - 2025-12-31 -Note: After merging this branch, publish a PyPI release (version 0.15) so -`pip install databusclient` reflects the updated CLI behavior and bug fixes. -# Changelog +### Added +- Vault authentication improvements with host-restricted token exchange +- Comprehensive tests for Vault authentication behavior +- Enhanced docstrings across all modules for better documentation coverage +- Support for download redirect handling -## 0.15 - Prepared release +### Fixed +- Vault token exchange now restricted to known hosts for improved security +- Clearer authentication error messages +- README instructions now consistent with PyPI release -- Prepare PyPI release 0.15. -- Restrict Vault token exchange to known hosts and provide clearer auth errors. -- Add tests for Vault auth behavior. -- Documentation: note about Vault-hosts and `--vault-token` usage. +### Changed +- Updated CLI usage documentation to reflect current command structure +- Improved error handling in download operations -(See PR and issue tracker for details.) +### Notes +- Version 0.15 skips 0.13 and 0.14 as requested in issue #35 +- This release updates the PyPI package to align with current repository features diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md new file mode 100644 index 0000000..b776ab2 --- /dev/null +++ b/RELEASE_NOTES.md @@ -0,0 +1,97 @@ +# Release Notes for databusclient 0.15 + +## Overview +This release addresses issue #35 by providing a new PyPI package (version 0.15) to ensure `pip install databusclient` provides the latest CLI features and bug fixes. + +## Version +**0.15** (skipping 0.13 and 0.14 as requested) + +## What's New + +### Features & Improvements +- **Vault Authentication Enhancement**: Host-restricted token exchange for improved security +- **Better Error Messages**: Clearer authentication error messages for easier debugging +- **Download Redirect Handling**: Improved handling of redirects during file downloads +- **Comprehensive Documentation**: Enhanced docstrings across all modules + +### Bug Fixes +- Fixed Vault token exchange to only work with known hosts +- Improved error handling in download operations +- Aligned README with current CLI behavior + +### Testing +- Added comprehensive tests for Vault authentication +- Improved test coverage overall + +## Installation + +After this release is published to PyPI, users can install or upgrade with: + +```bash +pip install databusclient==0.15 +# or to upgrade +pip install --upgrade databusclient +``` + +## Build Artifacts + +The following distribution files have been created and validated: +- `databusclient-0.15-py3-none-any.whl` (wheel format) +- `databusclient-0.15.tar.gz` (source distribution) + +Both files have passed `twine check` validation. + +## Publishing Instructions + +### Prerequisites +1. PyPI account with maintainer access to the `databusclient` package +2. PyPI API token configured + +### Steps to Publish + +1. **Verify the build artifacts** (already done): + ```bash + poetry build + twine check dist/* + ``` + +2. **Upload to TestPyPI** (recommended first): + ```bash + twine upload --repository testpypi dist/* + ``` + Then test installation: + ```bash + pip install --index-url https://test.pypi.org/simple/ databusclient==0.15 + ``` + +3. **Upload to PyPI**: + ```bash + twine upload dist/* + ``` + +4. **Create a Git tag**: + ```bash + git tag -a v0.15 -m "Release version 0.15" + git push origin v0.15 + ``` + +5. **Create a GitHub Release**: + - Go to GitHub repository → Releases → Draft a new release + - Choose tag `v0.15` + - Release title: `databusclient 0.15` + - Copy content from CHANGELOG.md + - Attach the dist files as release assets + +## Verification + +After publishing, verify the release: +```bash +pip install --upgrade databusclient==0.15 +databusclient --version +databusclient --help +``` + +## Notes +- This release resolves issue #35 +- The PyPI package will now be consistent with the repository's CLI documentation +- Version numbers 0.13 and 0.14 were intentionally skipped as requested diff --git a/databusclient/__init__.py b/databusclient/__init__.py index 92fe8b7..7b2c625 100644 --- a/databusclient/__init__.py +++ b/databusclient/__init__.py @@ -8,6 +8,7 @@ from databusclient import cli from databusclient.api.deploy import create_dataset, create_distribution, deploy +__version__ = "0.15" __all__ = ["create_dataset", "deploy", "create_distribution"] From 1b6fa2766a114e6b7a58974482a080fff5c3587f Mon Sep 17 00:00:00 2001 From: DhanashreePetare Date: Wed, 7 Jan 2026 00:01:52 +0530 Subject: [PATCH 5/5] Convert all docstrings to Google-style format --- databusclient/api/delete.py | 88 +++++++-------- databusclient/api/download.py | 199 ++++++++++++++++------------------ databusclient/api/utils.py | 12 +- 3 files changed, 140 insertions(+), 159 deletions(-) diff --git a/databusclient/api/delete.py b/databusclient/api/delete.py index c5e9e85..edfb95c 100644 --- a/databusclient/api/delete.py +++ b/databusclient/api/delete.py @@ -68,16 +68,15 @@ def execute(self): def _confirm_delete(databusURI: str) -> str: - """ - Confirm deletion of a Databus resource with the user. + """Confirm deletion of a Databus resource with the user. - Parameters: - - databusURI: The full databus URI of the resource to delete + Args: + databusURI: The full databus URI of the resource to delete. Returns: - - "confirm" if the user confirms deletion - - "skip" if the user chooses to skip deletion - - "cancel" if the user chooses to cancel the entire deletion process + "confirm" if the user confirms deletion. + "skip" if the user chooses to skip deletion. + "cancel" if the user chooses to cancel the entire deletion process. """ print(f"Are you sure you want to delete: {databusURI}?") print( @@ -108,18 +107,17 @@ def _delete_resource( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete a single Databus resource (version, artifact, group). + """Delete a single Databus resource (version, artifact, group). Equivalent to: curl -X DELETE "" -H "accept: */*" -H "X-API-KEY: " - Parameters: - - databusURI: The full databus URI of the resource to delete - - databus_key: Databus API key to authenticate the deletion request - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URI to the queue instead of deleting immediately + Args: + databusURI: The full databus URI of the resource to delete. + databus_key: Databus API key to authenticate the deletion request. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URI to the queue instead of deleting immediately. """ # Confirm the deletion request, skip the request or cancel deletion process @@ -161,15 +159,14 @@ def _delete_list( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete a list of Databus resources. - - Parameters: - - databusURIs: List of full databus URIs of the resources to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URIs to the queue instead of deleting immediately + """Delete a list of Databus resources. + + Args: + databusURIs: List of full databus URIs of the resources to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URIs to the queue instead of deleting immediately. """ for databusURI in databusURIs: _delete_resource( @@ -184,18 +181,17 @@ def _delete_artifact( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete an artifact and all its versions. + """Delete an artifact and all its versions. This function first retrieves all versions of the artifact and then deletes them one by one. Finally, it deletes the artifact itself. - Parameters: - - databusURI: The full databus URI of the artifact to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URI to the queue instead of deleting immediately + Args: + databusURI: The full databus URI of the artifact to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URI to the queue instead of deleting immediately. """ artifact_body = fetch_databus_jsonld(databusURI, databus_key) @@ -231,18 +227,17 @@ def _delete_group( force: bool = False, queue: DeleteQueue = None, ): - """ - Delete a group and all its artifacts and versions. + """Delete a group and all its artifacts and versions. This function first retrieves all artifacts of the group, then deletes each artifact (which in turn deletes its versions). Finally, it deletes the group itself. - Parameters: - - databusURI: The full databus URI of the group to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, do not perform the deletion but only print what would be deleted - - force: If True, skip confirmation prompt and proceed with deletion - - queue: If queue is provided, add the URI to the queue instead of deleting immediately + Args: + databusURI: The full databus URI of the group to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, do not perform the deletion but only print what would be deleted. + force: If True, skip confirmation prompt and proceed with deletion. + queue: If queue is provided, add the URI to the queue instead of deleting immediately. """ group_body = fetch_databus_jsonld(databusURI, databus_key) @@ -269,17 +264,16 @@ def _delete_group( def delete(databusURIs: List[str], databus_key: str, dry_run: bool, force: bool): - """ - Delete a dataset from the databus. + """Delete a dataset from the databus. Delete a group, artifact, or version identified by the given databus URI. Will recursively delete all data associated with the dataset. - Parameters: - - databusURIs: List of full databus URIs of the resources to delete - - databus_key: Databus API key to authenticate the deletion requests - - dry_run: If True, will only print what would be deleted without performing actual deletions - - force: If True, skip confirmation prompt and proceed with deletion + Args: + databusURIs: List of full databus URIs of the resources to delete. + databus_key: Databus API key to authenticate the deletion requests. + dry_run: If True, will only print what would be deleted without performing actual deletions. + force: If True, skip confirmation prompt and proceed with deletion. """ queue = DeleteQueue(databus_key) diff --git a/databusclient/api/download.py b/databusclient/api/download.py index ac55faa..f045ce2 100644 --- a/databusclient/api/download.py +++ b/databusclient/api/download.py @@ -33,16 +33,15 @@ def _download_file( auth_url=None, client_id=None, ) -> None: - """ - Download a file from the internet with a progress bar using tqdm. - - Parameters: - - url: the URL of the file to download - - localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory. - - vault_token_file: Path to Vault refresh token file - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download a file from the internet with a progress bar using tqdm. + + Args: + url: The URL of the file to download. + localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory. + vault_token_file: Path to Vault refresh token file. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ if localDir is None: _host, account, group, artifact, version, file = ( @@ -192,16 +191,15 @@ def _download_files( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download multiple files from the databus. - - Parameters: - - urls: List of file download URLs - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - vault_token_file: Path to Vault refresh token file - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download multiple files from the databus. + + Args: + urls: List of file download URLs. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + vault_token_file: Path to Vault refresh token file. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ for url in urls: _download_file( @@ -215,15 +213,14 @@ def _download_files( def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> str: - """ - Get SPARQL query of collection members from databus collection URI. + """Get SPARQL query of collection members from databus collection URI. - Parameters: - - uri: The full databus collection URI - - databus_key: Optional Databus API key for authentication on protected resources + Args: + uri: The full databus collection URI. + databus_key: Optional Databus API key for authentication on protected resources. Returns: - SPARQL query string to get download URLs of all files in the collection. + SPARQL query string to get download URLs of all files in the collection. """ headers = {"Accept": "text/sparql"} if databus_key is not None: @@ -235,16 +232,15 @@ def _get_sparql_query_of_collection(uri: str, databus_key: str | None = None) -> def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: - """ - Query a SPARQL endpoint and return results in JSON format. + """Query a SPARQL endpoint and return results in JSON format. - Parameters: - - endpoint_url: the URL of the SPARQL endpoint - - query: the SPARQL query string - - databus_key: Optional API key for authentication + Args: + endpoint_url: The URL of the SPARQL endpoint. + query: The SPARQL query string. + databus_key: Optional API key for authentication. Returns: - - Dictionary containing the query results + Dictionary containing the query results. """ sparql = SPARQLWrapper(endpoint_url) sparql.method = "POST" @@ -259,16 +255,15 @@ def _query_sparql_endpoint(endpoint_url, query, databus_key=None) -> dict: def _get_file_download_urls_from_sparql_query( endpoint_url, query, databus_key=None ) -> List[str]: - """ - Execute a SPARQL query to get databus file download URLs. + """Execute a SPARQL query to get databus file download URLs. - Parameters: - - endpoint_url: the URL of the SPARQL endpoint - - query: the SPARQL query string - - databus_key: Optional API key for authentication + Args: + endpoint_url: The URL of the SPARQL endpoint. + query: The SPARQL query string. + databus_key: Optional API key for authentication. Returns: - - List of file download URLs + List of file download URLs. """ result_dict = _query_sparql_endpoint(endpoint_url, query, databus_key=databus_key) @@ -359,17 +354,16 @@ def _download_collection( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download all files in a databus collection. - - Parameters: - - uri: The full databus collection URI - - endpoint: the databus SPARQL endpoint URL - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - vault_token: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download all files in a databus collection. + + Args: + uri: The full databus collection URI. + endpoint: The databus SPARQL endpoint URL. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + vault_token: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ query = _get_sparql_query_of_collection(uri, databus_key=databus_key) file_urls = _get_file_download_urls_from_sparql_query( @@ -393,16 +387,15 @@ def _download_version( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download all files in a databus artifact version. - - Parameters: - - uri: The full databus artifact version URI - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - vault_token_file: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download all files in a databus artifact version. + + Args: + uri: The full databus artifact version URI. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + vault_token_file: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) file_urls = _get_file_download_urls_from_artifact_jsonld(json_str) @@ -425,17 +418,16 @@ def _download_artifact( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download files in a databus artifact. - - Parameters: - - uri: The full databus artifact URI - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - all_versions: If True, download all versions of the artifact; otherwise, only download the latest version - - vault_token_file: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download files in a databus artifact. + + Args: + uri: The full databus artifact URI. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + all_versions: If True, download all versions of the artifact; otherwise, only download the latest version. + vault_token_file: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) versions = _get_databus_versions_of_artifact(json_str, all_versions=all_versions) @@ -458,16 +450,15 @@ def _download_artifact( def _get_databus_versions_of_artifact( json_str: str, all_versions: bool ) -> str | List[str]: - """ - Parse the JSON-LD of a databus artifact to extract URLs of its versions. + """Parse the JSON-LD of a databus artifact to extract URLs of its versions. - Parameters: - - json_str: JSON-LD string of the databus artifact - - all_versions: If True, return all version URLs; otherwise, return only the latest version URL + Args: + json_str: JSON-LD string of the databus artifact. + all_versions: If True, return all version URLs; otherwise, return only the latest version URL. Returns: - - If all_versions is True: List of all version URLs - - If all_versions is False: URL of the latest version + If all_versions is True: List of all version URLs. + If all_versions is False: URL of the latest version. """ json_dict = json.loads(json_str) versions = json_dict.get("databus:hasVersion") @@ -495,15 +486,15 @@ def _get_databus_versions_of_artifact( def _get_file_download_urls_from_artifact_jsonld(json_str: str) -> List[str]: - """ - Parse the JSON-LD of a databus artifact version to extract download URLs. + """Parse the JSON-LD of a databus artifact version to extract download URLs. + Don't get downloadURLs directly from the JSON-LD, but follow the "file" links to count access to databus accurately. - Parameters: - - json_str: JSON-LD string of the databus artifact version + Args: + json_str: JSON-LD string of the databus artifact version. Returns: - List of all file download URLs in the artifact version. + List of all file download URLs in the artifact version. """ databusIdUrl: List[str] = [] @@ -528,17 +519,16 @@ def _download_group( auth_url: str = None, client_id: str = None, ) -> None: - """ - Download files in a databus group. - - Parameters: - - uri: The full databus group URI - - localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. - - all_versions: If True, download all versions of each artifact in the group; otherwise, only download the latest version - - vault_token_file: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL - - client_id: Client ID for token exchange + """Download files in a databus group. + + Args: + uri: The full databus group URI. + localDir: Local directory to download files to. If None, the databus folder structure is created in the current working directory. + all_versions: If True, download all versions of each artifact in the group; otherwise, only download the latest version. + vault_token_file: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. + client_id: Client ID for token exchange. """ json_str = fetch_databus_jsonld(uri, databus_key=databus_key) artifacts = _get_databus_artifacts_of_group(json_str) @@ -599,19 +589,18 @@ def download( auth_url="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", client_id="vault-token-exchange", ) -> None: - """ - Download datasets from databus. + """Download datasets from databus. Download of files, versions, artifacts, groups or databus collections via their databus URIs or user-defined SPARQL queries that return file download URLs. - Parameters: - - localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. - - endpoint: the databus endpoint URL. If None, inferred from databusURI. Required for user-defined SPARQL queries. - - databusURIs: databus identifiers to specify datasets to download. - - token: Path to Vault refresh token file for protected downloads - - databus_key: Databus API key for protected downloads - - auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token". - - client_id: Client ID for token exchange. Default is "vault-token-exchange". + Args: + localDir: Local directory to download datasets to. If None, the databus folder structure is created in the current working directory. + endpoint: The databus endpoint URL. If None, inferred from databusURI. Required for user-defined SPARQL queries. + databusURIs: Databus identifiers to specify datasets to download. + token: Path to Vault refresh token file for protected downloads. + databus_key: Databus API key for protected downloads. + auth_url: Keycloak token endpoint URL. Default is "https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token". + client_id: Client ID for token exchange. Default is "vault-token-exchange". """ for databusURI in databusURIs: host, account, group, artifact, version, file = ( diff --git a/databusclient/api/utils.py b/databusclient/api/utils.py index 25c5300..948268c 100644 --- a/databusclient/api/utils.py +++ b/databusclient/api/utils.py @@ -19,16 +19,14 @@ def get_databus_id_parts_from_file_url( Optional[str], Optional[str], ]: - """ - Extract databus ID parts from a given databus URI. + """Extract databus ID parts from a given databus URI. - Parameters: - - uri: The full databus URI of the form - "http(s)://host/accountId/groupId/artifactId/versionId/fileId" + Args: + uri: The full databus URI of the form "http(s)://host/accountId/groupId/artifactId/versionId/fileId". Returns: - A tuple containing (host, accountId, groupId, artifactId, versionId, fileId). - Each element is a string or None if not present. + A tuple containing (host, accountId, groupId, artifactId, versionId, fileId). + Each element is a string or None if not present. """ """Split a Databus URI into its six parts.