Skip to content

Commit 592c1dc

Browse files
authored
Merge pull request #89 from ChevronETC/copyapi
use Azure copy api
2 parents 5ab726f + e1e86a7 commit 592c1dc

File tree

4 files changed

+226
-19
lines changed

4 files changed

+226
-19
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ jobs:
3737
run: |
3838
az group create -l southcentralus -n "AzureBackupRG-azstorage-${{ matrix.os }}-${{ matrix.version }}-${{ github.run_id }}"
3939
az storage account create --min-tls-version TLS1_2 -n "s${{ steps.uuid.outputs.uuid }}" -g "AzureBackupRG-azstorage-${{ matrix.os }}-${{ matrix.version }}-${{ github.run_id }}" -l southcentralus
40+
az storage account create --min-tls-version TLS1_2 -n "s${{ steps.uuid.outputs.uuid }}2" -g "AzureBackupRG-azstorage-${{ matrix.os }}-${{ matrix.version }}-${{ github.run_id }}" -l southcentralus
4041
- uses: julia-actions/cache@v1
4142
- uses: julia-actions/julia-buildpkg@v1
4243
- uses: julia-actions/julia-runtest@v1
@@ -45,6 +46,7 @@ jobs:
4546
CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
4647
TENANT: ${{ secrets.TENANT_ID }}
4748
STORAGE_ACCOUNT: "s${{ steps.uuid.outputs.uuid }}"
49+
STORAGE_ACCOUNT_TOO: "s${{ steps.uuid.outputs.uuid }}2"
4850
- uses: julia-actions/julia-processcoverage@v1
4951
- uses: codecov/codecov-action@v5
5052
with:

Project.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "AzStorage"
22
uuid = "c6697862-1611-5eae-9ef8-48803c85c8d6"
3-
version = "2.7.2"
3+
version = "2.7.3"
44

55
[deps]
66
AbstractStorage = "14dbef02-f468-5f15-853e-5ec8dee7b899"
@@ -12,6 +12,7 @@ DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
1212
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
1313
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1414
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
15+
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
1516
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
1617
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
1718
XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
@@ -20,8 +21,10 @@ XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
2021
AbstractStorage = "^1.3"
2122
AzSessions = "2"
2223
AzStorage_jll = "0.9"
24+
Base64 = "1"
2325
DelimitedFiles = "1"
2426
HTTP = "1"
2527
ProgressMeter = "1"
28+
SHA = "0.7"
2629
XML = "0.3"
2730
julia = "^1.6"

src/AzStorage.jl

Lines changed: 194 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module AzStorage
22

3-
using AbstractStorage, AzSessions, AzStorage_jll, Base64, Dates, DelimitedFiles, XML, HTTP, Printf, ProgressMeter, Serialization, Sockets
3+
using AbstractStorage, AzSessions, AzStorage_jll, Base64, Dates, DelimitedFiles, XML, HTTP, Printf, ProgressMeter, SHA, Serialization, Sockets
44

55
# https://docs.microsoft.com/en-us/rest/api/storageservices/common-rest-api-error-codes
66
# https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/request-limits-and-throttling
@@ -854,9 +854,196 @@ function Base.cp(inc::AzContainer, inb::AbstractString, out::AbstractString; buf
854854
close(io)
855855
end
856856

857-
function Base.cp(inc::AzContainer, inb::AbstractString, outc::AzContainer, outb::AbstractString)
858-
bytes = read!(inc, inb, Vector{UInt8}(undef, filesize(inc, inb)))
859-
write(outc, outb, bytes)
857+
#=
858+
If the source and destination storage accounts for a blob copy are different, then the Azure storage API does not allow us
859+
to use OAuth2/RBAC directly for the source blob. But, we can use a user delegation SAS token which is built in the following
860+
two methods: 'generate_user_delegation_key' and 'get_user_delegation_sas'.
861+
=#
862+
function get_user_delegation_key(c::AzContainer; start=now(UTC), expiry=now(UTC)+Hour(1))
863+
start_str = Dates.format(start, "yyyy-mm-ddTHH:MM:SSZ")
864+
expiry_str = Dates.format(expiry, "yyyy-mm-ddTHH:MM:SSZ")
865+
866+
r = @retry c.nretry HTTP.request(
867+
"POST",
868+
"https://$(c.storageaccount).blob.core.windows.net/?restype=service&comp=userdelegationkey",
869+
[
870+
"Authorization" => "Bearer $(token(c.session))",
871+
"x-ms-version" => API_VERSION,
872+
"Content-Type" => "application/xml"
873+
],
874+
"""
875+
<?xml version="1.0" encoding="utf-8"?>
876+
<KeyInfo>
877+
<Start>$start_str</Start>
878+
<Expiry>$expiry_str</Expiry>
879+
</KeyInfo>
880+
""";
881+
retry = false,
882+
verbose = c.verbose,
883+
connect_timeout = c.connect_timeout,
884+
readtimeout = c.read_timeout)
885+
886+
b = XML.parse(String(r.body), LazyNode)
887+
delegation_key = Dict{String,String}()
888+
for child in children(b)
889+
if tag(child) == "UserDelegationKey"
890+
for grandchild in children(child)
891+
if tag(grandchild) in ("SignedOid", "SignedTid", "SignedStart", "SignedExpiry", "SignedService", "SignedVersion", "Value")
892+
delegation_key[string(tag(grandchild))] = value(first(children(grandchild)))
893+
end
894+
end
895+
end
896+
end
897+
898+
delegation_key
899+
end
900+
901+
function generate_user_delegation_sas(c::AzContainer, b::AbstractString; permissions="r", start=now(UTC), expiry=now(UTC)+Hour(1))
902+
delegation_key = get_user_delegation_key(c; start, expiry)
903+
904+
signedPermissions = permissions
905+
signedStart = Dates.format(start, "yyyy-mm-ddTHH:MM:SSZ")
906+
signedExpiry = Dates.format(expiry, "yyyy-mm-ddTHH:MM:SSZ")
907+
canonicalizedResource = "/blob/$(c.storageaccount)/$(c.containername)/$(addprefix(c,b))"
908+
signedKeyObjectId = delegation_key["SignedOid"]
909+
signedKeyTenantId = delegation_key["SignedTid"]
910+
signedKeyStart = delegation_key["SignedStart"]
911+
signedKeyExpiry = delegation_key["SignedExpiry"]
912+
signedKeyService = delegation_key["SignedService"]
913+
signedKeyVersion = delegation_key["SignedVersion"]
914+
signedAuthorizedUserObjectId = ""
915+
signedUnauthorizedUserObjectId = ""
916+
signedCorrelationId = ""
917+
signedIP = ""
918+
signedProtocol = "https"
919+
signedVersion = API_VERSION
920+
signedResource = "b"
921+
signedSnapshotTime = ""
922+
signedEncryptionScope = ""
923+
rscc = ""
924+
rscd = ""
925+
rsce = ""
926+
rscl = ""
927+
rsct = ""
928+
929+
string_to_sign =
930+
signedPermissions * "\n" *
931+
signedStart * "\n" *
932+
signedExpiry * "\n" *
933+
canonicalizedResource * "\n" *
934+
signedKeyObjectId * "\n" *
935+
signedKeyTenantId * "\n" *
936+
signedKeyStart * "\n" *
937+
signedKeyExpiry * "\n" *
938+
signedKeyService * "\n" *
939+
signedKeyVersion * "\n" *
940+
signedAuthorizedUserObjectId * "\n" *
941+
signedUnauthorizedUserObjectId * "\n" *
942+
signedCorrelationId * "\n" *
943+
"\n" *
944+
"\n" *
945+
signedIP * "\n" *
946+
signedProtocol * "\n" *
947+
signedVersion * "\n" *
948+
signedResource * "\n" *
949+
signedSnapshotTime * "\n" *
950+
signedEncryptionScope * "\n" *
951+
rscc * "\n" *
952+
rscd * "\n" *
953+
rsce * "\n" *
954+
rscl * "\n" *
955+
rsct
956+
957+
# sign the string using the delegation key
958+
key = base64decode(delegation_key["Value"])
959+
message = collect(codeunits(string_to_sign))
960+
signed_string = HTTP.escapeuri(base64encode(hmac_sha256(key, message)))
961+
962+
# sas token
963+
"sp=$signedPermissions&" *
964+
"st=$signedStart&" *
965+
"se=$signedExpiry&" *
966+
"skoid=$signedKeyObjectId&" *
967+
"sktid=$signedKeyTenantId&" *
968+
"skt=$signedKeyStart&" *
969+
"ske=$signedKeyExpiry&" *
970+
"sks=$signedKeyService&" *
971+
"skv=$signedKeyVersion&" *
972+
(isempty(signedIP) ? "" : "sip=$signedIP&") *
973+
"spr=$signedProtocol&" *
974+
"sv=$signedVersion&" *
975+
"sr=$signedResource&" *
976+
"sig=$signed_string"
977+
end
978+
979+
function status(c::AzContainer, b::AbstractString)
980+
r_status = @retry c.nretry HTTP.request(
981+
"HEAD",
982+
"https://$(c.storageaccount).blob.core.windows.net/$(c.containername)/$(addprefix(c,b))",
983+
[
984+
"Authorization" => "Bearer $(token(c.session))",
985+
"x-ms-version" => API_VERSION
986+
];
987+
retry = false,
988+
verbose = c.verbose,
989+
connect_timeout = c.connect_timeout,
990+
readtimeout = c.read_timeout
991+
)
992+
993+
copy_status = HTTP.header(r_status, "x-ms-copy-status")
994+
copy_progress = HTTP.header(r_status, "x-ms-copy-progress")
995+
copy_reason = HTTP.header(r_status, "x-ms-copy-status-description")
996+
997+
Dict("status"=>copy_status, "progress"=>copy_progress, "reason"=>copy_reason)
998+
end
999+
1000+
function Base.cp(inc::AzContainer, inb::AbstractString, outc::AzContainer, outb::AbstractString; showprogress=false, async=false)
1001+
source_url = "https://$(inc.storageaccount).blob.core.windows.net/$(inc.containername)/$(addprefix(inc,inb))"
1002+
1003+
if inc.storageaccount != outc.storageaccount
1004+
sas = generate_user_delegation_sas(inc, inb; permissions="r", start=now(UTC), expiry=now(UTC)+Hour(1))
1005+
source_url *= "?$sas"
1006+
end
1007+
1008+
headers = [
1009+
"Authorization" => "Bearer $(token(outc.session))",
1010+
"x-ms-version" => API_VERSION,
1011+
"x-ms-copy-source" => source_url
1012+
]
1013+
1014+
r_copy = @retry inc.nretry HTTP.request(
1015+
"PUT",
1016+
"https://$(outc.storageaccount).blob.core.windows.net/$(outc.containername)/$(addprefix(outc,outb))",
1017+
headers;
1018+
retry = false,
1019+
verbose = inc.verbose,
1020+
connect_timeout = inc.connect_timeout,
1021+
readtimeout = inc.read_timeout
1022+
)
1023+
1024+
if !async && r_copy.status == 202
1025+
while true
1026+
local stat
1027+
try
1028+
stat = status(outc, outb)
1029+
catch
1030+
@warn "unable to get copy status for blob copy, retrying..."
1031+
stat = Dict("status"=>"unknown")
1032+
end
1033+
1034+
if stat["status"] == "success"
1035+
break
1036+
elseif stat["status"] == "aborted"
1037+
error("blob copy aborted, dest=$(outc.storageaccount): $(outc.containername)/$(addprefix(outc,outb)), reason=$(stat["reason"])")
1038+
break
1039+
elseif stat["status"] == "pending" && showprogress
1040+
print("copy progress: $(stat["progress"])\r")
1041+
end
1042+
sleep(1)
1043+
end
1044+
end
1045+
1046+
nothing
8601047
end
8611048

8621049
"""
@@ -1193,19 +1380,8 @@ function Base.cp(src::AzContainer, dst::AzContainer)
11931380
mkpath(dst)
11941381

11951382
blobs = readdir(src)
1196-
for blob in blobs
1197-
@retry dst.nretry HTTP.request(
1198-
"PUT",
1199-
"https://$(dst.storageaccount).blob.core.windows.net/$(dst.containername)/$(addprefix(dst,blob))",
1200-
[
1201-
"Authorization" => "Bearer $(token(dst.session))",
1202-
"x-ms-version" => API_VERSION,
1203-
"x-ms-copy-source" => "https://$(src.storageaccount).blob.core.windows.net/$(src.containername)/$(addprefix(src,blob))"
1204-
],
1205-
retry = false,
1206-
verbose = src.verbose,
1207-
connect_timeout = src.connect_timeout,
1208-
readtimeout = src.read_timeout)
1383+
@sync for blob in blobs
1384+
@async cp(src, blob, dst, blob)
12091385
end
12101386
nothing
12111387
end
@@ -1240,6 +1416,6 @@ Note that the information stored is global, and not specfic to any one given IO
12401416
"""
12411417
getperf_counters() = @ccall libAzStorage.getperf_counters()::PerfCounters
12421418

1243-
export AzContainer, containers, readdlm, writedlm
1419+
export AzContainer, containers, readdlm, status, writedlm
12441420

12451421
end

test/runtests.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ end
4848
storageaccount = ENV["STORAGE_ACCOUNT"]
4949
@info "storageaccount=$storageaccount"
5050

51+
storageaccount_too = get(ENV, "STORAGE_ACCOUNT_TOO", "")
52+
@info "storageaccount_too=$storageaccount_too"
53+
5154
for container in containers(;storageaccount=storageaccount,session=session)
5255
rm(AzContainer(container;storageaccount=storageaccount,session=session))
5356
end
@@ -545,9 +548,32 @@ end
545548
write(c, "foo.txt", "Hello world")
546549
cp(c, "foo.txt", c, "bar.txt")
547550
@test read(c, "bar.txt", String) == "Hello world"
551+
cp(c, "foo.txt", c, "baz.txt"; async=true)
552+
timeout = 30
553+
tic = time()
554+
while status(c, "baz.txt")["status"] != "success"
555+
if time() - tic > timeout
556+
error("failed async copy")
557+
end
558+
sleep(1)
559+
end
560+
s = status(c, "baz.txt")
548561
rm(c)
549562
end
550563

564+
@testset "Container, copy blob to blob, different storage accounts" begin
565+
r = uuid4()
566+
c1 = AzContainer("foo-$r-o", storageaccount=storageaccount, session=session, nthreads=2, nretry=10)
567+
c1 = robust_mkpath(c1)
568+
write(c1, "foo.txt", "Hello world")
569+
c2 = AzContainer("foo-$r-o", storageaccount=storageaccount_too, session=session, nthreads=2, nretry=10)
570+
c2 = robust_mkpath(c2)
571+
cp(c1, "foo.txt", c2, "bar.txt")
572+
@test read(c2, "bar.txt", String) == "Hello world"
573+
rm(c1)
574+
rm(c2)
575+
end
576+
551577
@testset "Object, copy blob to local file" begin
552578
r = uuid4()
553579
c = AzContainer("foo-$r-o", storageaccount=storageaccount, session=session, nthreads=2, nretry=10)

0 commit comments

Comments
 (0)