Skip to content

Commit 0455c22

Browse files
committed
upload via rclone is now possible for every rclone remote (only tested for nextcloud)
1 parent 4649e64 commit 0455c22

File tree

5 files changed

+182
-63
lines changed

5 files changed

+182
-63
lines changed

README.md

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ Usage: databusclient deploy [OPTIONS] [DISTRIBUTIONS]...
171171
172172
- Metadata-based deployment
173173
174-
- Upload & deploy via Nextcloud
174+
- Upload & deploy via Rclone
175175
176176
Arguments:
177177
DISTRIBUTIONS... Depending on mode:
@@ -192,10 +192,8 @@ Options:
192192
--license TEXT License (see dalicc.net) [required]
193193
--apikey TEXT API key [required]
194194
--metadata PATH Path to metadata JSON file (for metadata mode)
195-
--webdav-url TEXT WebDAV URL (e.g.,
196-
https://cloud.example.com/remote.php/webdav)
197-
--remote TEXT rclone remote name (e.g., 'nextcloud')
198-
--path TEXT Remote path on Nextcloud (e.g., 'datasets/mydataset')
195+
--remote TEXT rclone remote name (e.g., 'my-nextcloud')
196+
--path TEXT Remote path on Rclone Remote (e.g., 'datasets/mydataset')
199197
--help Show this message and exit.
200198
201199
```
@@ -251,20 +249,19 @@ Metadata file structure (file_format and compression are optional):
251249
```
252250

253251

254-
##### Mode 3: Upload & Deploy via Nextcloud
252+
##### Mode 3: Upload & Deploy via Rclone
255253

256-
Upload local files or folders to a WebDAV/Nextcloud instance and automatically deploy to DBpedia Databus.
254+
Upload local files or folders to a Rclone remote and automatically deploy to DBpedia Databus.
257255
Rclone is required.
258256

259257
```bash
260258
databusclient deploy \
261-
--webdav-url https://cloud.example.com/remote.php/webdav \
262-
--remote nextcloud \
259+
--remote my-nextcloud \
263260
--path datasets/mydataset \
264261
--version-id https://databus.org/user/dataset/version/1.0 \
265262
--title "Test Dataset" \
266263
--abstract "Short abstract of dataset" \
267-
--description "This dataset was uploaded for testing the Nextcloud → Databus pipeline." \
264+
--description "This dataset was uploaded for testing the Rclone → Databus pipeline." \
268265
--license https://dalicc.net/licenselibrary/Apache-2.0 \
269266
--apikey "API-KEY" \
270267
./localfile1.ttl \

databusclient/cli.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,28 +29,27 @@ def app():
2929

3030
@click.option("--metadata", "metadata_file", type=click.Path(exists=True),
3131
help="Path to metadata JSON file (for metadata mode)")
32-
@click.option("--webdav-url", "webdav_url", help="WebDAV URL (e.g., https://cloud.example.com/remote.php/webdav)")
33-
@click.option("--remote", help="rclone remote name (e.g., 'nextcloud')")
34-
@click.option("--path", help="Remote path on Nextcloud (e.g., 'datasets/mydataset')")
32+
@click.option("--remote", help="rclone remote name (e.g., 'my-nextcloud')")
33+
@click.option("--path", help="Remote path on Rclone Remote (e.g., 'datasets/mydataset')")
3534

3635
@click.argument("distributions", nargs=-1)
3736
def deploy(version_id, title, abstract, description, license_url, apikey,
38-
metadata_file, webdav_url, remote, path, distributions: List[str]):
37+
metadata_file, remote, path, distributions: List[str]):
3938
"""
4039
Flexible deploy to Databus command supporting three modes:\n
4140
- Classic deploy (distributions as arguments)\n
4241
- Metadata-based deploy (--metadata <file>)\n
43-
- Upload & deploy via Nextcloud (--webdav-url, --remote, --path)
42+
- Upload & deploy via Rclone (--remote, --path)
4443
"""
4544

4645
# Sanity checks for conflicting options
47-
if metadata_file and any([distributions, webdav_url, remote, path]):
48-
raise click.UsageError("Invalid combination: when using --metadata, do not provide --webdav-url, --remote, --path, or distributions.")
49-
if any([webdav_url, remote, path]) and not all([webdav_url, remote, path]):
50-
raise click.UsageError("Invalid combination: when using WebDAV/Nextcloud mode, please provide --webdav-url, --remote, and --path together.")
46+
if metadata_file and any([distributions, remote, path]):
47+
raise click.UsageError("Invalid combination: when using --metadata, do not provide --remote, --path, or distributions.")
48+
if any([remote, path]) and not all([remote, path]):
49+
raise click.UsageError("Invalid combination: when using Rclone mode, please provide --remote, and --path together.")
5150

5251
# === Mode 1: Classic Deploy ===
53-
if distributions and not (metadata_file or webdav_url or remote or path):
52+
if distributions and not (metadata_file or remote or path):
5453
click.echo("[MODE] Classic deploy with distributions")
5554
click.echo(f"Deploying dataset version: {version_id}")
5655

@@ -66,27 +65,27 @@ def deploy(version_id, title, abstract, description, license_url, apikey,
6665
client.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey)
6766
return
6867

69-
# === Mode 3: Upload & Deploy (Nextcloud) ===
70-
if webdav_url and remote and path:
68+
# === Mode 3: Upload & Deploy (Rclone) ===
69+
if remote and path:
7170
if not distributions:
72-
raise click.UsageError("Please provide files to upload when using WebDAV/Nextcloud mode.")
71+
raise click.UsageError("Please provide files to upload when using Rclone mode.")
7372

7473
#Check that all given paths exist and are files or directories.#
7574
invalid = [f for f in distributions if not os.path.exists(f)]
7675
if invalid:
7776
raise click.UsageError(f"The following input files or folders do not exist: {', '.join(invalid)}")
7877

79-
click.echo("[MODE] Upload & Deploy to DBpedia Databus via Nextcloud")
78+
click.echo("[MODE] Upload & Deploy to DBpedia Databus via Rclone")
8079
click.echo(f"→ Uploading to: {remote}:{path}")
81-
metadata = upload.upload_to_nextcloud(distributions, remote, path, webdav_url)
80+
metadata = upload.upload_with_rclone(distributions, remote, path)
8281
client.deploy_from_metadata(metadata, version_id, title, abstract, description, license_url, apikey)
8382
return
8483

8584
raise click.UsageError(
8685
"No valid input provided. Please use one of the following modes:\n"
8786
" - Classic deploy: pass distributions as arguments\n"
8887
" - Metadata deploy: use --metadata <file>\n"
89-
" - Upload & deploy: use --webdav-url, --remote, --path, and file arguments"
88+
" - Upload & deploy: use --remote, --path, and file arguments"
9089
)
9190

9291

Lines changed: 72 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import hashlib
22
import os
3-
import subprocess
43
import posixpath
5-
from urllib.parse import urljoin, quote
4+
import re
5+
import subprocess
6+
from typing import List, Dict, Union
7+
8+
from rclone_python import rclone
69

710

811
def compute_sha256_and_length(filepath):
@@ -17,6 +20,7 @@ def compute_sha256_and_length(filepath):
1720
total_length += len(chunk)
1821
return sha256.hexdigest(), total_length
1922

23+
2024
def get_all_files(path):
2125
if os.path.isfile(path):
2226
return [path]
@@ -26,7 +30,23 @@ def get_all_files(path):
2630
files.append(os.path.join(root, name))
2731
return files
2832

29-
def upload_to_nextcloud(source_paths: list[str], remote_name: str, remote_path: str, webdav_url: str):
33+
34+
def upload_with_rclone(source_paths: list[str], remote_name: str, remote_path: str) -> List[Dict[str, Union[str, int]]]:
35+
"""
36+
Upload files or directories to any rclone remote using rclone-python.
37+
38+
Args:
39+
source_paths: List of local paths to upload.
40+
remote_name: Name of the rclone remote (e.g., 'mydrive').
41+
remote_path: Remote path inside the remote (e.g., 'backup/').
42+
43+
Returns:
44+
List of dictionaries with keys: filename, checksum, size, url.
45+
"""
46+
47+
if not rclone.is_installed():
48+
raise RuntimeError("rclone not found. Please install rclone.")
49+
3050
result = []
3151
for path in source_paths:
3252
if not os.path.exists(path):
@@ -37,46 +57,62 @@ def upload_to_nextcloud(source_paths: list[str], remote_name: str, remote_path:
3757
basename = os.path.basename(abs_path)
3858
files = get_all_files(abs_path)
3959

40-
tmp_results = []
41-
4260
for file in files:
4361
checksum,size = compute_sha256_and_length(file)
4462

4563
if os.path.isdir(path):
4664
rel_file = os.path.relpath(file, abs_path)
47-
# Normalize to POSIX for WebDAV/URLs
65+
# Normalize to POSIX
4866
rel_file = rel_file.replace(os.sep, "/")
49-
remote_webdav_path = posixpath.join(remote_path, basename, rel_file)
67+
dest_subpath = f"{remote_path.rstrip('/')}/{basename}/{rel_file}"
5068
else:
51-
remote_webdav_path = posixpath.join(remote_path, os.path.basename(file))
69+
dest_subpath = f"{remote_path.rstrip('/')}/{os.path.basename(file)}"
5270

53-
# Preserve scheme/host and percent-encode path segments
54-
url = urljoin(webdav_url.rstrip("/") + "/", quote(remote_webdav_path.lstrip("/"), safe="/"))
71+
destination = f"{remote_name}:{dest_subpath}"
5572

56-
filename = os.path.basename(file)
57-
tmp_results.append({
58-
"filename": filename,
59-
"checksum": checksum,
60-
"size": size,
61-
"url": url,
62-
})
73+
# Upload File
74+
try:
75+
if os.path.isdir(path):
76+
rclone.copy(abs_path, f"{remote_name}:{remote_path.rstrip('/')}/{basename}", args=["--progress"])
77+
else:
78+
rclone.copyto(abs_path, destination, args=["--progress"])
6379

64-
dest_subpath = posixpath.join(remote_path.lstrip("/"), basename)
65-
if os.path.isdir(path):
66-
destination = f"{remote_name}:{dest_subpath}"
67-
command = ["rclone", "copy", abs_path, destination, "--progress"]
68-
else:
69-
destination = f"{remote_name}:{dest_subpath}"
70-
command = ["rclone", "copyto", abs_path, destination, "--progress"]
71-
72-
print(f"Upload: {path}{destination}")
73-
try:
74-
subprocess.run(command, check=True)
75-
result.extend(tmp_results)
76-
print("✅ Uploaded successfully.\n")
77-
except subprocess.CalledProcessError as e:
78-
print(f"❌ Error uploading {path}: {e}\n")
79-
except FileNotFoundError:
80-
print("❌ rclone not found on PATH. Install rclone and retry.")
81-
82-
return result
80+
# Get URL
81+
try:
82+
url = rclone.link(destination).strip()
83+
if not url:
84+
url = None
85+
except Exception:
86+
url = None
87+
88+
if not url:
89+
try:
90+
proc = subprocess.run(["rclone", "config", "show", remote_name],
91+
capture_output=True, text=True, check=True)
92+
out = proc.stdout
93+
m = re.search(r"(?m)^\s*url\s*=\s*(.+)$", out)
94+
if m:
95+
remote_url = m.group(1).strip()
96+
url = posixpath.join(remote_url, dest_subpath)
97+
else:
98+
url = None
99+
except Exception as e:
100+
print(f"Cannot resolve remote URL: {e}")
101+
102+
if url:
103+
result.append({
104+
"filename": os.path.basename(file),
105+
"checksum": checksum,
106+
"size": size,
107+
"url": url,
108+
})
109+
110+
print(f"✅ Uploaded {file}{destination}")
111+
print(f" Cloud URL: {url}\n")
112+
else:
113+
print(" No cloud URL available for this remote.\n")
114+
115+
except Exception as e:
116+
print(f"❌ Error uploading {file}: {e}\n")
117+
118+
return result

poetry.lock

Lines changed: 87 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ requests = "^2.28.1"
1313
tqdm = "^4.42.1"
1414
SPARQLWrapper = "^2.0.0"
1515
rdflib = "^7.2.1"
16+
rclone-python = "^0.1.23"
1617

1718
[tool.poetry.group.dev.dependencies]
1819
black = "^22.6.0"

0 commit comments

Comments
 (0)