From 0f0bfe3b0d038924160cf0091e8d1396405561d0 Mon Sep 17 00:00:00 2001 From: tan Date: Mon, 27 Oct 2025 10:16:53 +0530 Subject: [PATCH 1/3] feat: support minio credentials for datasets Handle minio credentials for dataset operations. fixes: https://github.com/JuliaComputing/JuliaHub/issues/20486 --- src/datasets.jl | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/datasets.jl b/src/datasets.jl index 8a42010a9..d0ca2f0b5 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -801,7 +801,7 @@ end function _upload_dataset(upload_config, local_path; progress::Bool) type = upload_config["upload_type"] vendor = upload_config["vendor"] - if type != "S3" || vendor != "aws" + if type != "S3" || !(vendor in ("aws", "minio")) throw(JuliaHubError("Unknown upload type ($type) or vendor ($vendor)")) end mktemp() do rclone_conf_path, rclone_conf_io @@ -868,19 +868,29 @@ function _write_rclone_config( access_key_id::AbstractString, secret_access_key::AbstractString, session_token::AbstractString, + provider::AbstractString="AWS", + endpoint::AbstractString="", ) + if lowercase(provider) == "aws" + provider = "AWS" + elseif lowercase(provider) == "minio" + provider = "Minio" + else + throw(JuliaHubError("Unknown storage backend $(provider)")) + end + write( io, """ [juliahub_remote] type = s3 -provider = AWS +provider = $provider env_auth = false access_key_id = $access_key_id secret_access_key = $secret_access_key session_token = $session_token region = $region -endpoint = +endpoint = $endpoint location_constraint = $region acl = private server_side_encryption = @@ -894,7 +904,11 @@ function _write_rclone_config(io::IO, upload_config::Dict) access_key_id = upload_config["credentials"]["access_key_id"] secret_access_key = upload_config["credentials"]["secret_access_key"] session_token = upload_config["credentials"]["session_token"] - _write_rclone_config(io; region, access_key_id, secret_access_key, session_token) + provider = upload_config["vendor"] + endpoint = get(upload_config["credentials"], "endpoint_url", "") + _write_rclone_config( + io; region, access_key_id, secret_access_key, session_token, provider, endpoint + ) end function _get_dataset_credentials(auth::Authentication, dataset::Dataset) @@ -988,9 +1002,11 @@ function download_dataset( throw(InvalidRequestError("Dataset '$(dataset.name)' does not have version 'v$version'")) credentials = Mocking.@mock _get_dataset_credentials(auth, dataset) - credentials["vendor"] == "aws" || + provider = credentials["vendor"] + provider in ("aws", "minio") || throw(JuliaHubError("Unknown 'vendor': $(credentials["vendor"])")) credentials = credentials["credentials"] + endpoint = get(credentials, "endpoint_url", "") bucket = dataset._storage.bucket prefix = dataset._storage.prefix @@ -1019,6 +1035,8 @@ function download_dataset( access_key_id=credentials["access_key_id"], secret_access_key=credentials["secret_access_key"], session_token=credentials["session_token"], + provider=provider, + endpoint=endpoint, ) close(rclone_conf_io) From 702a89cd2609f9ad7495ac34d57ad6d4d47ae9d0 Mon Sep 17 00:00:00 2001 From: tan Date: Fri, 31 Oct 2025 08:11:31 +0530 Subject: [PATCH 2/3] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fea68941..3a0b58def 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## Unreleased +### Added + +* Support for JuliaHub instances using MinIO as datasets storage backend. + ### Fixed * The `.message` property _should_ always be present in the log messages, but there are a few versions out there where it is sometimes omitted due to a backend bug. We default to an empty string in those cases. ([#111]) From bda3a91b281a65c3a1e5c4e97cb533ba1ced73a3 Mon Sep 17 00:00:00 2001 From: Morten Piibeleht Date: Mon, 3 Nov 2025 20:03:51 +1300 Subject: [PATCH 3/3] Document MinIO backend requirements for datasets Add information about MinIO backend requirements for JuliaHub datasets. --- docs/src/reference/datasets.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/reference/datasets.md b/docs/src/reference/datasets.md index 7dc55a414..c01c15277 100644 --- a/docs/src/reference/datasets.md +++ b/docs/src/reference/datasets.md @@ -38,6 +38,10 @@ A JuliaHub dataset can have zero or more versions. A newly created dataset _usually_ has at least one version, but it may have zero versions if, for example, the upload did not finish. The versions are indexed with a linear list of integers starting from `1`. +## MinIO backend + +JuliaHub instances with the MinIO backend for data storage require at least JuliaHub.jl v0.1.6 for dataset uploads and downloads. + ## Reference ```@docs