Skip to content

Commit 62026e5

Browse files
committed
feat: dynamic local-dir creation if not specified, like databus hierarchy
1 parent f28943e commit 62026e5

File tree

2 files changed

+41
-21
lines changed

2 files changed

+41
-21
lines changed

databusclient/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def deploy(
3737
@app.command()
3838
def download(
3939
databusuris: List[str] = typer.Argument(..., help="any kind of these: databus identifier, databus collection identifier, query file"),
40-
localDir: str = typer.Option("./tmp", help="local databus folder"),
41-
databus: str = typer.Option(None, help="databus URL"),
40+
localDir: str = typer.Option(None , help="local databus folder"), # if not given, databus folder structure is created in current working directory
41+
databus: str = typer.Option(None, help="databus URL"), # if not given, inferred on first databusuri (e.g. https://databus.dbpedia.org/sparql)
4242
vault_token_file: str = typer.Option(None, help="Path to Vault refresh token file"),
4343
auth_url: str = typer.Option("https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", help="Keycloak token endpoint URL"),
4444
client_id: str = typer.Option("vault-token-exchange", help="Client ID for token exchange")

databusclient/client.py

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -413,26 +413,27 @@ def __download_file__(url, filename, vault_token_file=None, auth_url=None, clien
413413
print("Download file: "+url)
414414
os.makedirs(os.path.dirname(filename), exist_ok=True) # Create the necessary directories
415415

416-
# --- 1. Try without token ---
417-
response = requests.get(url, stream=True, allow_redirects=False)
418-
# print("Response code:", response.status_code)
419-
# print(f"Status code: {response.status_code}")
420-
# print(f"Headers: {response.headers}")
416+
# --- 1. Get redirect URL by requesting HEAD ---
417+
response = requests.head(url, stream=True)
421418
url = response.headers.get("Location") # update URL to the final one after redirects
419+
# print(f"Status code: {response.status_code} \nResponse code: {response.status_code}\nHeaders: {response.headers}")
422420
print("URL after redirects:", url)
423-
# print(f"Full response: {response}")
424-
# exit(0)
425421

426-
if (response.status_code == 401 or "WWW-Authenticate" in response.headers or url.startswith("https://data.dbpedia.io/databus.dbpedia.org")):
422+
# --- 2. Try direct GET ---
423+
response = requests.get(url, stream=True, allow_redirects=False) # no redirects here, we want to see if auth is required
424+
www = response.headers.get('WWW-Authenticate', '') # get WWW-Authenticate header if present to check for Bearer auth
425+
# print(f"Status code: {response.status_code} \nResponse code: {response.status_code}\nHeaders: {response.headers}")
426+
427+
if (response.status_code == 401 or "bearer" in www.lower()):
427428
print(f"Authentication required for {url}")
428429
if not (vault_token_file):
429430
raise RuntimeError("Authentication required but no vault_token provided")
430431

431-
# --- 2. Fetch Vault token ---
432+
# --- 3. Fetch Vault token ---
432433
vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id)
433434
headers = {"Authorization": f"Bearer {vault_token}"}
434435

435-
# --- 3. Retry with token ---
436+
# --- 4. Retry with token ---
436437
response = requests.get(url, headers=headers, stream=True)
437438

438439
response.raise_for_status() # Raise if still failing
@@ -567,11 +568,23 @@ def __download_list__(urls: List[str],
567568
auth_url: str = None,
568569
client_id: str = None) -> None:
569570
for url in urls:
571+
if localDir is None:
572+
host, account, group, artifact, version, file = __get_databus_id_parts__(url)
573+
localDir = os.path.join(os.getcwd(), account, group, artifact, version if version is not None else "latest")
574+
print(f"Local directory not given, using {localDir}")
575+
570576
file = url.split("/")[-1]
571577
filename = os.path.join(localDir, file)
572578
__download_file__(url=url, filename=filename, vault_token_file=vault_token_file, auth_url=auth_url, client_id=client_id)
573579

574580

581+
def __get_databus_id_parts__(uri: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str], Optional[str], Optional[str]]:
582+
uri = uri.removeprefix("https://").removeprefix("http://")
583+
parts = uri.strip("/").split("/")
584+
parts += [None] * (6 - len(parts)) # pad with None if less than 6 parts
585+
return tuple(parts[:6]) # return only the first 6 parts
586+
587+
575588
def download(
576589
localDir: str,
577590
endpoint: str,
@@ -591,15 +604,14 @@ def download(
591604
client_id: Client ID for token exchange
592605
"""
593606

594-
# Auto-detect sparql endpoint from first databusURI if not given -> no need to specify endpoint (--databus)
595-
if endpoint is None:
596-
host = databusURIs[0].split("/")[2]
597-
endpoint = f"https://{host}/sparql"
598-
print(f"SPARQL endpoint {endpoint}")
607+
for databusURI in databusURIs:
608+
host, account, group, artifact, version, file = __get_databus_id_parts__(databusURI)
599609

600-
databusVersionPattern = re.compile(r"^https://(databus\.dbpedia\.org|databus\.dev\.dbpedia\.link)/[^/]+/[^/]+/[^/]+/[^/]+/?$")
610+
# Auto-detect sparql endpoint from databusURI if not given -> no need to specify endpoint (--databus)
611+
if endpoint is None:
612+
endpoint = f"https://{host}/sparql"
613+
print(f"SPARQL endpoint {endpoint}")
601614

602-
for databusURI in databusURIs:
603615
# dataID or databus collection
604616
if databusURI.startswith("http://") or databusURI.startswith("https://"):
605617
# databus collection
@@ -608,12 +620,20 @@ def download(
608620
res = __handle_databus_file_query__(endpoint, query)
609621
__download_list__(res, localDir)
610622
# databus artifact version // https://(databus.dbpedia.org|databus.dev.dbpedia.link)/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION
611-
elif databusVersionPattern.match(databusURI):
623+
elif file is not None:
624+
print("fileId not supported yet") # TODO
625+
elif version is not None:
612626
json_str = __handle_databus_artifact_version__(databusURI)
613627
res = __handle_databus_file_json__(json_str)
614628
__download_list__(res, localDir, vault_token_file=vault_token_file, auth_url=auth_url, client_id=client_id)
629+
elif artifact is not None:
630+
print("artifactId not supported yet") # TODO
631+
elif group is not None:
632+
print("groupId not supported yet") # TODO
633+
elif account is not None:
634+
print("accountId not supported yet") # TODO
615635
else:
616-
print("dataId not supported yet") # TODO add support for other DatabusIds here (artifact, group, etc.)
636+
print("dataId not supported yet") # TODO add support for other DatabusIds
617637
# query in local file
618638
elif databusURI.startswith("file://"):
619639
print("query in file not supported yet")

0 commit comments

Comments
 (0)