Skip to content

Commit 3f73738

Browse files
committed
added deploy script with uploading to given rclone remote
1 parent 058e5cf commit 3f73738

File tree

7 files changed

+236
-9
lines changed

7 files changed

+236
-9
lines changed

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@
55
python3 -m pip install databusclient
66
```
77

8+
## Upload to Nextcloud and Deploy to Databus
9+
Please add databus API_KEY to .env file
10+
11+
The script uploads all given files and all files in the given folders to the given remote.
12+
Then registers them on the databus.
13+
### Example Call
14+
```bash
15+
cd databusclient
16+
17+
python deploy.py \
18+
--remote scads-nextcloud \
19+
--path test \
20+
--version-id https://databus.dbpedia.org/gg46ixav/test_group/test_artifact/2023-07-03 \
21+
--title "Test Dataset" \
22+
--abstract "This is a short abstract of the test dataset." \
23+
--description "This dataset was uploaded for testing the Nextcloud → Databus deployment pipeline." \
24+
--license https://dalicc.net/licenselibrary/Apache-2.0 \
25+
/home/theo/Work/SCADS.AI/Projects/CSVTest/newtestoutputfolder \
26+
/home/theo/Work/SCADS.AI/Projects/CSVTest/output.csv.bz2
27+
28+
```
829
## CLI Usage
930
```bash
1031
databusclient --help

databusclient/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ def append_to_dataset_graph_if_existent(add_key: str, add_value: str):
342342
graphs.append(dataset_graph)
343343

344344
dataset = {
345-
"@context": "https://downloads.dbpedia.org/databus/context.jsonld",
345+
"@context": "https://databus.dbpedia.org/res/context.jsonld",
346346
"@graph": graphs,
347347
}
348348
return dataset

databusclient/deploy.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import os
2+
import sys
3+
import argparse
4+
5+
from databusclient import create_distribution, create_dataset, deploy
6+
from dotenv import load_dotenv
7+
8+
from nextcloudclient.upload import upload_to_nextcloud
9+
10+
11+
def deploy_to_databus(
12+
metadata,
13+
version_id,
14+
title,
15+
abstract,
16+
description,
17+
license_url
18+
):
19+
20+
load_dotenv()
21+
api_key = os.getenv("API_KEY")
22+
if not api_key:
23+
raise ValueError("API_KEY not found in .env")
24+
25+
distributions = []
26+
counter=0
27+
for filename, checksum, size, url in metadata:
28+
29+
parts = filename.split(".")
30+
if len(parts) == 1:
31+
file_format = "none"
32+
compression = "none"
33+
elif len(parts) == 2:
34+
file_format = parts[-1]
35+
compression = "none"
36+
else:
37+
file_format = parts[-2]
38+
compression = parts[-1]
39+
40+
distributions.append(
41+
create_distribution(
42+
url=url,
43+
cvs={"count":f"{counter}"},
44+
file_format=file_format,
45+
compression=compression,
46+
sha256_length_tuple=(checksum, size)
47+
)
48+
)
49+
counter+=1
50+
51+
dataset = create_dataset(
52+
version_id=version_id,
53+
title=title,
54+
abstract=abstract,
55+
description=description,
56+
license_url=license_url,
57+
distributions=distributions
58+
)
59+
60+
deploy(dataset, api_key)
61+
metadata_string = ",\n".join([entry[-1] for entry in metadata])
62+
63+
print(f"Successfully deployed\n{metadata_string}\nto databus {version_id}")
64+
65+
def parse_args():
66+
parser = argparse.ArgumentParser(description="Upload files to Nextcloud and deploy to DBpedia Databus.")
67+
68+
parser.add_argument("files", nargs="+", help="Path(s) to file(s) or folder(s) to upload")
69+
parser.add_argument("--remote", required=True, help="rclone remote name (e.g., 'nextcloud')")
70+
parser.add_argument("--path", required=True, help="Remote path on Nextcloud (e.g., 'datasets/mydataset')")
71+
parser.add_argument("--version-id", required=True, help="Databus version URI")
72+
parser.add_argument("--title", required=True, help="Title of the dataset")
73+
parser.add_argument("--abstract", required=True, help="Short abstract of the dataset")
74+
parser.add_argument("--description", required=True, help="Detailed description of the dataset")
75+
parser.add_argument("--license", required=True, help="License URL (e.g., https://dalicc.net/licenselibrary/Apache-2.0)")
76+
77+
return parser.parse_args()
78+
79+
if __name__ == '__main__':
80+
81+
args = parse_args()
82+
83+
metadata = upload_to_nextcloud(args.files, args.remote, args.path)
84+
85+
deploy_to_databus(
86+
metadata,
87+
version_id=args.version_id,
88+
title=args.title,
89+
abstract=args.abstract,
90+
description=args.description,
91+
license_url=args.license
92+
)

nextcloudclient/upload.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import hashlib
2+
import os
3+
import subprocess
4+
import posixpath
5+
6+
BASE_URL = "https://cloud.scadsai.uni-leipzig.de"
7+
8+
def compute_sha256_and_length(filepath):
9+
sha256 = hashlib.sha256()
10+
total_length = 0
11+
with open(filepath, 'rb') as f:
12+
while True:
13+
chunk = f.read(4096)
14+
if not chunk:
15+
break
16+
sha256.update(chunk)
17+
total_length += len(chunk)
18+
return sha256.hexdigest(), total_length
19+
20+
def get_all_files(path):
21+
if os.path.isfile(path):
22+
return [path]
23+
files = []
24+
for root, _, filenames in os.walk(path):
25+
for name in filenames:
26+
files.append(os.path.join(root, name))
27+
return files
28+
29+
def upload_to_nextcloud(source_paths: str, remote_name: str, remote_path: str):
30+
result = []
31+
for path in source_paths:
32+
if not os.path.exists(path):
33+
print(f"Path not found: {path}")
34+
continue
35+
36+
abs_path = os.path.abspath(path)
37+
basename = os.path.basename(abs_path)
38+
files = get_all_files(abs_path)
39+
40+
for file in files:
41+
checksum,size = compute_sha256_and_length(file)
42+
43+
if os.path.isdir(path):
44+
rel_file = os.path.relpath(file, abs_path)
45+
remote_webdav_path = posixpath.join(remote_path, basename, rel_file)
46+
else:
47+
remote_webdav_path = posixpath.join(remote_path, os.path.basename(file))
48+
49+
url = f"{BASE_URL}/remote.php/webdav/{remote_webdav_path}"
50+
51+
filename = file.split("/")[-1]
52+
result.append((filename, checksum, size, url))
53+
54+
if os.path.isdir(path):
55+
destination = f"{remote_name}:{remote_path}/{basename}"
56+
command = ["rclone", "copy", abs_path, destination, "--progress"]
57+
else:
58+
destination = f"{remote_name}:{remote_path}/{basename}"
59+
command = ["rclone", "copyto", abs_path, destination, "--progress"]
60+
61+
print(f"Upload: {path}{destination}")
62+
try:
63+
subprocess.run(command, check=True)
64+
print("✅ Uploaded successfully.\n")
65+
except subprocess.CalledProcessError as e:
66+
print(f"❌ Error uploading {path}: {e}\n")
67+
68+
69+
return result

0 commit comments

Comments
 (0)