Skip to content

Commit df17a7c

Browse files
committed
refactored upload_and_deploy function
1 parent 7651c31 commit df17a7c

File tree

2 files changed

+104
-91
lines changed

2 files changed

+104
-91
lines changed

databusclient/cli.py

Lines changed: 51 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#!/usr/bin/env python3
22
import json
3-
import os
43

54
import click
65
from typing import List
@@ -41,50 +40,50 @@ def deploy(version_id, title, abstract, description, license_url, apikey, distri
4140

4241

4342
@app.command()
44-
@click.argument("databusuris", nargs=-1, required=True)
45-
@click.option("--localdir", help="Local databus folder (if not given, databus folder structure is created in current working directory)")
46-
@click.option("--databus", help="Databus URL (if not given, inferred from databusuri, e.g. https://databus.dbpedia.org/sparql)")
47-
@click.option("--token", help="Path to Vault refresh token file")
48-
@click.option("--authurl", default="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", show_default=True, help="Keycloak token endpoint URL")
49-
@click.option("--clientid", default="vault-token-exchange", show_default=True, help="Client ID for token exchange")
50-
def download(databusuris: List[str], localdir, databus, token, authurl, clientid):
43+
@click.option(
44+
"--metadata", "metadata_file",
45+
required=True,
46+
type=click.Path(exists=True),
47+
help="Path to metadata JSON file",
48+
)
49+
@click.option(
50+
"--version-id", "version_id",
51+
required=True,
52+
help="Target databus version/dataset identifier of the form "
53+
"<https://databus.dbpedia.org/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION>",
54+
)
55+
@click.option("--title", required=True, help="Dataset title")
56+
@click.option("--abstract", required=True, help="Dataset abstract max 200 chars")
57+
@click.option("--description", required=True, help="Dataset description")
58+
@click.option("--license", "license_url", required=True, help="License (see dalicc.net)")
59+
@click.option("--apikey", required=True, help="API key")
60+
def deploy_with_metadata(metadata_file, version_id, title, abstract, description, license_url, apikey):
5161
"""
52-
Download datasets from databus, optionally using vault access if vault options are provided.
62+
Deploy to DBpedia Databus using metadata json file.
5363
"""
54-
client.download(
55-
localDir=localdir,
56-
endpoint=databus,
57-
databusURIs=databusuris,
58-
token=token,
59-
auth_url=authurl,
60-
client_id=clientid,
61-
)
64+
65+
with open(metadata_file, 'r') as f:
66+
metadata = json.load(f)
67+
68+
client.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey)
6269

6370

6471
@app.command()
6572
@click.option(
6673
"--webdav-url", "webdav_url",
74+
required=True,
6775
help="WebDAV URL (e.g., https://cloud.example.com/remote.php/webdav)",
6876
)
6977
@click.option(
7078
"--remote",
79+
required=True,
7180
help="rclone remote name (e.g., 'nextcloud')",
7281
)
7382
@click.option(
7483
"--path",
84+
required=True,
7585
help="Remote path on Nextcloud (e.g., 'datasets/mydataset')",
7686
)
77-
@click.option(
78-
"--no-upload", "no_upload",
79-
is_flag=True,
80-
help="Skip file upload and use existing metadata",
81-
)
82-
@click.option(
83-
"--metadata",
84-
type=click.Path(exists=True),
85-
help="Path to metadata JSON file (required if --no-upload is used)",
86-
)
87-
8887
@click.option(
8988
"--version-id", "version_id",
9089
required=True,
@@ -96,80 +95,41 @@ def download(databusuris: List[str], localdir, databus, token, authurl, clientid
9695
@click.option("--description", required=True, help="Dataset description")
9796
@click.option("--license", "license_url", required=True, help="License (see dalicc.net)")
9897
@click.option("--apikey", required=True, help="API key")
99-
10098
@click.argument(
10199
"files",
102100
nargs=-1,
103101
type=click.Path(exists=True),
104102
)
105-
def upload_and_deploy(webdav_url, remote, path, no_upload, metadata, version_id, title, abstract, description, license_url, apikey, files: List[str]):
103+
def upload_and_deploy(webdav_url, remote, path, version_id, title, abstract, description, license_url, apikey,
104+
files: List[str]):
106105
"""
107106
Upload files to Nextcloud and deploy to DBpedia Databus.
108107
"""
109108

110-
if no_upload:
111-
if not metadata:
112-
raise click.ClickException("--metadata is required when using --no-upload")
113-
if not os.path.isfile(metadata):
114-
raise click.ClickException(f"Error: Metadata file not found: {metadata}")
115-
with open(metadata, 'r') as f:
116-
metadata = json.load(f)
117-
else:
118-
if not (webdav_url and remote and path):
119-
raise click.ClickException("Error: --webdav-url, --remote, and --path are required unless --no-upload is used")
120-
121-
click.echo(f"Uploading data to nextcloud: {remote}")
122-
metadata = upload.upload_to_nextcloud(files, remote, path, webdav_url)
123-
124-
125-
click.echo(f"Creating {len(metadata)} distributions")
126-
distributions = []
127-
counter = 0
128-
for entry in metadata:
129-
filename = entry["filename"]
130-
checksum = entry["checksum"]
131-
size = entry["size"]
132-
url = entry["url"]
133-
# Expect a SHA-256 hex digest (64 chars). Reject others.
134-
if not isinstance(checksum, str) or len(checksum) != 64:
135-
raise ValueError(f"Invalid checksum for {filename}: expected SHA-256 hex (64 chars), got '{checksum}'")
136-
parts = filename.split(".")
137-
if len(parts) == 1:
138-
file_format = "none"
139-
compression = "none"
140-
elif len(parts) == 2:
141-
file_format = parts[-1]
142-
compression = "none"
143-
else:
144-
file_format = parts[-2]
145-
compression = parts[-1]
146-
147-
distributions.append(
148-
client.create_distribution(
149-
url=url,
150-
cvs={"count": f"{counter}"},
151-
file_format=file_format,
152-
compression=compression,
153-
sha256_length_tuple=(checksum, size)
154-
)
155-
)
156-
counter += 1
157-
158-
dataset = client.create_dataset(
159-
version_id=version_id,
160-
title=title,
161-
abstract=abstract,
162-
description=description,
163-
license_url=license_url,
164-
distributions=distributions
165-
)
166-
167-
click.echo(f"Deploying dataset version: {version_id}")
109+
click.echo(f"Uploading data to nextcloud: {remote}")
110+
metadata = upload.upload_to_nextcloud(files, remote, path, webdav_url)
111+
client.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey)
168112

169-
deploy(dataset, apikey)
170-
metadata_string = ",\n".join([entry[-1] for entry in metadata])
171113

172-
click.echo(f"Successfully deployed\n{metadata_string}\nto databus {version_id}")
114+
@app.command()
115+
@click.argument("databusuris", nargs=-1, required=True)
116+
@click.option("--localdir", help="Local databus folder (if not given, databus folder structure is created in current working directory)")
117+
@click.option("--databus", help="Databus URL (if not given, inferred from databusuri, e.g. https://databus.dbpedia.org/sparql)")
118+
@click.option("--token", help="Path to Vault refresh token file")
119+
@click.option("--authurl", default="https://auth.dbpedia.org/realms/dbpedia/protocol/openid-connect/token", show_default=True, help="Keycloak token endpoint URL")
120+
@click.option("--clientid", default="vault-token-exchange", show_default=True, help="Client ID for token exchange")
121+
def download(databusuris: List[str], localdir, databus, token, authurl, clientid):
122+
"""
123+
Download datasets from databus, optionally using vault access if vault options are provided.
124+
"""
125+
client.download(
126+
localDir=localdir,
127+
endpoint=databus,
128+
databusURIs=databusuris,
129+
token=token,
130+
auth_url=authurl,
131+
client_id=clientid,
132+
)
173133

174134

175135
if __name__ == "__main__":

databusclient/client.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,40 @@ def create_distribution(
205205

206206
return f"{url}|{meta_string}"
207207

208+
def create_distributions_from_metadata(metadata):
209+
distributions = []
210+
counter = 0
211+
for entry in metadata:
212+
filename = entry["filename"]
213+
checksum = entry["checksum"]
214+
size = entry["size"]
215+
url = entry["url"]
216+
# Expect a SHA-256 hex digest (64 chars). Reject others.
217+
if not isinstance(checksum, str) or len(checksum) != 64:
218+
raise ValueError(f"Invalid checksum for {filename}: expected SHA-256 hex (64 chars), got '{checksum}'")
219+
parts = filename.split(".")
220+
if len(parts) == 1:
221+
file_format = "none"
222+
compression = "none"
223+
elif len(parts) == 2:
224+
file_format = parts[-1]
225+
compression = "none"
226+
else:
227+
file_format = parts[-2]
228+
compression = parts[-1]
229+
230+
distributions.append(
231+
create_distribution(
232+
url=url,
233+
cvs={"count": f"{counter}"},
234+
file_format=file_format,
235+
compression=compression,
236+
sha256_length_tuple=(checksum, size)
237+
)
238+
)
239+
counter += 1
240+
return distributions
241+
208242

209243
def create_dataset(
210244
version_id: str,
@@ -393,6 +427,25 @@ def deploy(
393427
print(resp.text)
394428

395429

430+
def deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey):
431+
distributions = create_distributions_from_metadata(metadata)
432+
433+
dataset = create_dataset(
434+
version_id=version_id,
435+
title=title,
436+
abstract=abstract,
437+
description=description,
438+
license_url=license_url,
439+
distributions=distributions
440+
)
441+
442+
print(f"Deploying dataset version: {version_id}")
443+
deploy(dataset, apikey)
444+
445+
metadata_string = ",\n".join([entry[-1] for entry in metadata])
446+
print(f"Successfully deployed\n{metadata_string}\nto databus {version_id}")
447+
448+
396449
def __download_file__(url, filename, vault_token_file=None, auth_url=None, client_id=None) -> None:
397450
"""
398451
Download a file from the internet with a progress bar using tqdm.

0 commit comments

Comments
 (0)