Skip to content

Commit b887de2

Browse files
tanmaykmmortenpi
andauthored
feat: support minio credentials for datasets (#107)
* feat: support minio credentials for datasets Handle minio credentials for dataset operations. fixes: JuliaComputing/JuliaHub#20486 * update changelog * Document MinIO backend requirements for datasets Add information about MinIO backend requirements for JuliaHub datasets. --------- Co-authored-by: Morten Piibeleht <[email protected]>
1 parent 1fce24d commit b887de2

File tree

3 files changed

+31
-5
lines changed

3 files changed

+31
-5
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
44

55
## Unreleased
66

7+
### Added
8+
9+
* Support for JuliaHub instances using MinIO as datasets storage backend.
10+
711
### Fixed
812

913
* The `.message` property _should_ always be present in the log messages, but there are a few versions out there where it is sometimes omitted due to a backend bug. We default to an empty string in those cases. ([#111])

docs/src/reference/datasets.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ A JuliaHub dataset can have zero or more versions.
3838
A newly created dataset _usually_ has at least one version, but it may have zero versions if, for example, the upload did not finish.
3939
The versions are indexed with a linear list of integers starting from `1`.
4040

41+
## MinIO backend
42+
43+
JuliaHub instances with the MinIO backend for data storage require at least JuliaHub.jl v0.1.6 for dataset uploads and downloads.
44+
4145
## Reference
4246

4347
```@docs

src/datasets.jl

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ end
801801
function _upload_dataset(upload_config, local_path; progress::Bool)
802802
type = upload_config["upload_type"]
803803
vendor = upload_config["vendor"]
804-
if type != "S3" || vendor != "aws"
804+
if type != "S3" || !(vendor in ("aws", "minio"))
805805
throw(JuliaHubError("Unknown upload type ($type) or vendor ($vendor)"))
806806
end
807807
mktemp() do rclone_conf_path, rclone_conf_io
@@ -868,19 +868,29 @@ function _write_rclone_config(
868868
access_key_id::AbstractString,
869869
secret_access_key::AbstractString,
870870
session_token::AbstractString,
871+
provider::AbstractString="AWS",
872+
endpoint::AbstractString="",
871873
)
874+
if lowercase(provider) == "aws"
875+
provider = "AWS"
876+
elseif lowercase(provider) == "minio"
877+
provider = "Minio"
878+
else
879+
throw(JuliaHubError("Unknown storage backend $(provider)"))
880+
end
881+
872882
write(
873883
io,
874884
"""
875885
[juliahub_remote]
876886
type = s3
877-
provider = AWS
887+
provider = $provider
878888
env_auth = false
879889
access_key_id = $access_key_id
880890
secret_access_key = $secret_access_key
881891
session_token = $session_token
882892
region = $region
883-
endpoint =
893+
endpoint = $endpoint
884894
location_constraint = $region
885895
acl = private
886896
server_side_encryption =
@@ -894,7 +904,11 @@ function _write_rclone_config(io::IO, upload_config::Dict)
894904
access_key_id = upload_config["credentials"]["access_key_id"]
895905
secret_access_key = upload_config["credentials"]["secret_access_key"]
896906
session_token = upload_config["credentials"]["session_token"]
897-
_write_rclone_config(io; region, access_key_id, secret_access_key, session_token)
907+
provider = upload_config["vendor"]
908+
endpoint = get(upload_config["credentials"], "endpoint_url", "")
909+
_write_rclone_config(
910+
io; region, access_key_id, secret_access_key, session_token, provider, endpoint
911+
)
898912
end
899913

900914
function _get_dataset_credentials(auth::Authentication, dataset::Dataset)
@@ -988,9 +1002,11 @@ function download_dataset(
9881002
throw(InvalidRequestError("Dataset '$(dataset.name)' does not have version 'v$version'"))
9891003

9901004
credentials = Mocking.@mock _get_dataset_credentials(auth, dataset)
991-
credentials["vendor"] == "aws" ||
1005+
provider = credentials["vendor"]
1006+
provider in ("aws", "minio") ||
9921007
throw(JuliaHubError("Unknown 'vendor': $(credentials["vendor"])"))
9931008
credentials = credentials["credentials"]
1009+
endpoint = get(credentials, "endpoint_url", "")
9941010

9951011
bucket = dataset._storage.bucket
9961012
prefix = dataset._storage.prefix
@@ -1019,6 +1035,8 @@ function download_dataset(
10191035
access_key_id=credentials["access_key_id"],
10201036
secret_access_key=credentials["secret_access_key"],
10211037
session_token=credentials["session_token"],
1038+
provider=provider,
1039+
endpoint=endpoint,
10221040
)
10231041
close(rclone_conf_io)
10241042

0 commit comments

Comments
 (0)