From ed3f2119c8bb2d40dfee23ca6e831b113b96f8e6 Mon Sep 17 00:00:00 2001 From: Maximilian Staib Date: Fri, 3 Jan 2025 12:48:40 +0100 Subject: [PATCH 1/6] Update CDS retreival to new API --- Project.toml | 3 +-- src/CDSAPI.jl | 36 ++++++++++++++++++------------------ test/py2ju.jl | 18 +++++++++--------- test/retrieve.jl | 11 ++++------- 4 files changed, 32 insertions(+), 36 deletions(-) diff --git a/Project.toml b/Project.toml index 1c28601..f3e96f8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,9 @@ name = "CDSAPI" uuid = "8a7b9de3-9c00-473e-88b4-7eccd7ef2fea" authors = ["Micky Yun Chan and contributors"] -version = "1.0.1" +version = "2.0.0" [deps] -Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" diff --git a/src/CDSAPI.jl b/src/CDSAPI.jl index e7a6e2d..827eb17 100644 --- a/src/CDSAPI.jl +++ b/src/CDSAPI.jl @@ -2,7 +2,6 @@ module CDSAPI using HTTP using JSON -using Base64 """ retrieve(name, params, filename; max_sleep = 120.) @@ -14,45 +13,46 @@ directory as `filename`. The client periodically requests the status of the retrieve request. `max_sleep` is the maximum time (in seconds) between the status updates. """ -function retrieve(name, params, filename; max_sleep = 120.) +function retrieve(name, params, filename; max_sleep=120.0) creds = Dict() - open(joinpath(homedir(),".cdsapirc")) do f + open(joinpath(homedir(), ".cdsapirc")) do f for line in readlines(f) - key, val = strip.(split(line,':', limit=2)) + key, val = strip.(split(line, ':', limit=2)) creds[key] = val end end - apikey = string("Basic ", base64encode(creds["key"])) response = HTTP.request( "POST", - creds["url"] * "/resources/$name", - ["Authorization" => apikey], - body=JSON.json(params), + creds["url"] * "/retrieve/v1/processes/$name/execute/", + ["PRIVATE-TOKEN" => creds["key"]], + body=JSON.json(Dict("inputs" => params)), verbose=1) resp_dict = JSON.parse(String(response.body)) - data = Dict("state" => "queued") - sleep_seconds = 1. + data = Dict("status" => "queued") + sleep_seconds = 1.0 - while data["state"] != "completed" - data = HTTP.request("GET", creds["url"] * "/tasks/" * string(resp_dict["request_id"]), ["Authorization" => apikey]) + while data["status"] != "successful" + data = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]), ["PRIVATE-TOKEN" => creds["key"]]) data = JSON.parse(String(data.body)) - println("request queue status ", data["state"]) + println("request queue status ", data["status"]) - if data["state"] == "failed" + if data["status"] == "failed" error("Request to dataset $name failed. Check " * "https://cds.climate.copernicus.eu/cdsapp#!/yourrequests " * "for more information (after login).") end - sleep_seconds = min(1.5 * sleep_seconds,max_sleep) - if data["state"] != "completed" + sleep_seconds = min(1.5 * sleep_seconds, max_sleep) + if data["status"] != "successful" sleep(sleep_seconds) end end - HTTP.download(data["location"], filename) + response = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]) * "/results/", ["PRIVATE-TOKEN" => creds["key"]]) + body = JSON.parse(String(response.body)) + HTTP.download(body["asset"]["value"]["href"], filename) return data end @@ -88,7 +88,7 @@ function py2ju(dictstr) # if there's no pair after the last comma if findnext(":", dictstr_cpy, lastcomma_pos) == nothing # remove the comma - dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos - 1)] * dictstr_cpy[(lastcomma_pos + 1):lastindex(dictstr_cpy)] + dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos-1)] * dictstr_cpy[(lastcomma_pos+1):lastindex(dictstr_cpy)] end # removes trailing comma from a list diff --git a/test/py2ju.jl b/test/py2ju.jl index db8e10f..a8b987c 100644 --- a/test/py2ju.jl +++ b/test/py2ju.jl @@ -1,6 +1,6 @@ @testset "Py2Ju" begin pydict_str = """{ - 'format': 'grib', + 'data_format': 'grib', 'product_type': 'monthly_averaged_reanalysis', 'variable': 'divergence', 'pressure_level': '1', @@ -12,14 +12,14 @@ ], 'time': '00:00', }""" - julia_dict = Dict("format"=> "grib", - "month" => "06", - "time" => "00:00", - "year" => "2020", - "pressure_level" => "1", - "area" => Any[90, -180, -90, 180], - "product_type" => "monthly_averaged_reanalysis", - "variable" => "divergence") + julia_dict = Dict("data_format" => "grib", + "month" => "06", + "time" => "00:00", + "year" => "2020", + "pressure_level" => "1", + "area" => Any[90, -180, -90, 180], + "product_type" => "monthly_averaged_reanalysis", + "variable" => "divergence") py2ju_result = CDSAPI.py2ju(pydict_str) @test typeof(py2ju_result) <: Dict diff --git a/test/retrieve.jl b/test/retrieve.jl index 606eab2..4f9d19d 100644 --- a/test/retrieve.jl +++ b/test/retrieve.jl @@ -1,11 +1,11 @@ @testset "Retrieve" begin - datadir = joinpath(@__DIR__,"data") + datadir = joinpath(@__DIR__, "data") @testset "ERA5 monthly preasure data" begin filepath = joinpath(datadir, "era5.grib") response = CDSAPI.retrieve("reanalysis-era5-pressure-levels-monthly-means", CDSAPI.py2ju("""{ - 'format': 'grib', + 'data_format': 'grib', 'product_type': 'monthly_averaged_reanalysis', 'variable': 'divergence', 'pressure_level': '1', @@ -20,7 +20,6 @@ filepath) @test typeof(response) <: Dict - @test response["content_type"] == "application/x-grib" @test isfile(filepath) GribFile(filepath) do datafile @@ -43,12 +42,11 @@ 'emissions_scenario': 'rcp_2_6', 'period': '2071_2100', 'return_period': '100', - 'format': 'zip', + 'data_format': 'zip', }"""), filepath) @test typeof(response) <: Dict - @test response["content_type"] == "application/zip" @test isfile(filepath) # extract contents @@ -76,12 +74,11 @@ 'time_aggregation': '1_year_average', 'vertical_level': '0_m', 'bias_correction': 'bias_adjustment_based_on_gamma_distribution', - 'format': 'tgz', + 'data_format': 'tgz', }"""), filepath) @test typeof(response) <: Dict - @test response["content_type"] == "application/gzip" @test isfile(filepath) # extract contents From 6c01aa27c2eadd829b0516f0a9730c46c3050710 Mon Sep 17 00:00:00 2001 From: Maximilian Staib Date: Tue, 7 Jan 2025 10:30:14 +0100 Subject: [PATCH 2/6] Fix retrieve tests with new datasets --- test/retrieve.jl | 60 ++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/test/retrieve.jl b/test/retrieve.jl index 4f9d19d..a14ac02 100644 --- a/test/retrieve.jl +++ b/test/retrieve.jl @@ -32,16 +32,17 @@ rm(filepath) end - @testset "Europe water quantity data" begin - filepath = joinpath(datadir, "ewq.zip") - response = CDSAPI.retrieve("sis-water-quantity-swicca", + @testset "Sea ice type data" begin + filepath = joinpath(datadir, "sea_ice_type.zip") + response = CDSAPI.retrieve("satellite-sea-ice-edge-type", CDSAPI.py2ju("""{ - 'variable': 'river_flow', - 'time_aggregation': 'annual_maximum', - 'horizontal_aggregation': 'catchments', - 'emissions_scenario': 'rcp_2_6', - 'period': '2071_2100', - 'return_period': '100', + 'variable': 'sea_ice_type', + 'region': 'northern_hemisphere', + 'cdr_type': 'cdr', + 'year': '1979', + 'month': '01', + 'day': '02', + 'version': '3_0', 'data_format': 'zip', }"""), filepath) @@ -57,23 +58,24 @@ close(zip_reader) # test file contents - @test ncgetatt(ewq_file, "Global", "time_coverage_start") == "20710101" - @test ncgetatt(ewq_file, "Global", "time_coverage_end") == "21001231" - @test ncgetatt(ewq_file, "Global", "invar_experiment_name") == "rcp26" + @test ncgetatt(ewq_file, "Global", "time_coverage_start") == "19790102T000000Z" + @test ncgetatt(ewq_file, "Global", "time_coverage_end") == "19790103T000000Z" # cleanup rm(filepath) rm(ewq_file) end - @testset "European energy sector cimate" begin - filepath = joinpath(datadir, "ees.tar.gz") - response = CDSAPI.retrieve("sis-european-energy-sector", + @testset "Surface air relative humidity" begin + filepath = joinpath(datadir, "ecc.tar.gz") + response = CDSAPI.retrieve("ecv-for-climate-change", CDSAPI.py2ju("""{ - 'variable': 'precipitation', - 'time_aggregation': '1_year_average', - 'vertical_level': '0_m', - 'bias_correction': 'bias_adjustment_based_on_gamma_distribution', + 'variable': 'surface_air_relative_humidity', + 'origin': 'era5', + 'product_type': 'monthly_mean', + 'time_aggregation': '1_month_mean', + 'year': '2014', + 'month': '01', 'data_format': 'tgz', }"""), filepath) @@ -82,18 +84,22 @@ @test isfile(filepath) # extract contents - gzip_io = GZip.open(filepath) - eesfile_dir = Tar.extract(gzip_io, joinpath(datadir, "ees")) - ees_file = joinpath(eesfile_dir, readdir(eesfile_dir)[1]) - close(gzip_io) + ecc_dir = joinpath(datadir, "ecc") + mkdir(ecc_dir) + run(`tar -xzvf $filepath -C $ecc_dir`) + ecc_file = joinpath(ecc_dir, readdir(ecc_dir)[1]) # test file contents - @test ncgetatt(ees_file, "Global", "frequency") == "year" - @test ncgetatt(ees_file, "tp", "long_name") == "precip total" + GribFile(ecc_file) do f + data = Message(f) + @test data["date"] == 20140101 + @test data["typeOfLevel"] == "surface" + @test data["name"] == "Relative humidity" + end # cleanup rm(filepath) - rm(ees_file) - rm(eesfile_dir) + rm(ecc_file) + rm(ecc_dir) end end From ae21210e14d8fc38ebd3c24df151135dca2b333b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 7 Jan 2025 07:20:03 -0300 Subject: [PATCH 3/6] Rename max_sleep --> wait and adjust tests --- src/CDSAPI.jl | 33 +++++++++++++----------- test/retrieve.jl | 65 ------------------------------------------------ 2 files changed, 19 insertions(+), 79 deletions(-) diff --git a/src/CDSAPI.jl b/src/CDSAPI.jl index 827eb17..05fcbe6 100644 --- a/src/CDSAPI.jl +++ b/src/CDSAPI.jl @@ -4,16 +4,16 @@ using HTTP using JSON """ - retrieve(name, params, filename; max_sleep = 120.) + retrieve(name, params, filename; wait=1.0) Retrieves data for `name` from the Climate Data Store with the specified `params` and stores it in the current directory as `filename`. The client periodically requests the status of the retrieve request. -`max_sleep` is the maximum time (in seconds) between the status updates. +`wait` is the maximum time (in seconds) between status updates. """ -function retrieve(name, params, filename; max_sleep=120.0) +function retrieve(name, params, filename; wait=1.0) creds = Dict() open(joinpath(homedir(), ".cdsapirc")) do f for line in readlines(f) @@ -28,31 +28,36 @@ function retrieve(name, params, filename; max_sleep=120.0) ["PRIVATE-TOKEN" => creds["key"]], body=JSON.json(Dict("inputs" => params)), verbose=1) - - resp_dict = JSON.parse(String(response.body)) + body = JSON.parse(String(response.body)) data = Dict("status" => "queued") - sleep_seconds = 1.0 while data["status"] != "successful" - data = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]), ["PRIVATE-TOKEN" => creds["key"]]) + data = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]), ["PRIVATE-TOKEN" => creds["key"]]) data = JSON.parse(String(data.body)) - println("request queue status ", data["status"]) + @info "request status" data["status"] if data["status"] == "failed" - error("Request to dataset $name failed. Check " * - "https://cds.climate.copernicus.eu/cdsapp#!/yourrequests " * - "for more information (after login).") + throw(ErrorException(""" + Request to dataset $name failed. + Check https://cds.climate.copernicus.eu/requests + for more information (after login). + """ + )) end - sleep_seconds = min(1.5 * sleep_seconds, max_sleep) if data["status"] != "successful" - sleep(sleep_seconds) + sleep(wait) end end - response = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]) * "/results/", ["PRIVATE-TOKEN" => creds["key"]]) + response = HTTP.request( + "GET", + creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]) * "/results/", + ["PRIVATE-TOKEN" => creds["key"]] + ) body = JSON.parse(String(response.body)) HTTP.download(body["asset"]["value"]["href"], filename) + return data end diff --git a/test/retrieve.jl b/test/retrieve.jl index 4f9d19d..fe67dfb 100644 --- a/test/retrieve.jl +++ b/test/retrieve.jl @@ -31,69 +31,4 @@ end rm(filepath) end - - @testset "Europe water quantity data" begin - filepath = joinpath(datadir, "ewq.zip") - response = CDSAPI.retrieve("sis-water-quantity-swicca", - CDSAPI.py2ju("""{ - 'variable': 'river_flow', - 'time_aggregation': 'annual_maximum', - 'horizontal_aggregation': 'catchments', - 'emissions_scenario': 'rcp_2_6', - 'period': '2071_2100', - 'return_period': '100', - 'data_format': 'zip', - }"""), - filepath) - - @test typeof(response) <: Dict - @test isfile(filepath) - - # extract contents - zip_reader = ZipFile.Reader(filepath) - ewq_fileio = zip_reader.files[1] - ewq_file = joinpath(datadir, ewq_fileio.name) - write(ewq_file, read(ewq_fileio)) - close(zip_reader) - - # test file contents - @test ncgetatt(ewq_file, "Global", "time_coverage_start") == "20710101" - @test ncgetatt(ewq_file, "Global", "time_coverage_end") == "21001231" - @test ncgetatt(ewq_file, "Global", "invar_experiment_name") == "rcp26" - - # cleanup - rm(filepath) - rm(ewq_file) - end - - @testset "European energy sector cimate" begin - filepath = joinpath(datadir, "ees.tar.gz") - response = CDSAPI.retrieve("sis-european-energy-sector", - CDSAPI.py2ju("""{ - 'variable': 'precipitation', - 'time_aggregation': '1_year_average', - 'vertical_level': '0_m', - 'bias_correction': 'bias_adjustment_based_on_gamma_distribution', - 'data_format': 'tgz', - }"""), - filepath) - - @test typeof(response) <: Dict - @test isfile(filepath) - - # extract contents - gzip_io = GZip.open(filepath) - eesfile_dir = Tar.extract(gzip_io, joinpath(datadir, "ees")) - ees_file = joinpath(eesfile_dir, readdir(eesfile_dir)[1]) - close(gzip_io) - - # test file contents - @test ncgetatt(ees_file, "Global", "frequency") == "year" - @test ncgetatt(ees_file, "tp", "long_name") == "precip total" - - # cleanup - rm(filepath) - rm(ees_file) - rm(eesfile_dir) - end end From 0cb98f0e314e6a996b43ffe2114f1c1da15c7ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 7 Jan 2025 07:36:15 -0300 Subject: [PATCH 4/6] Minor adjustments --- src/CDSAPI.jl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/CDSAPI.jl b/src/CDSAPI.jl index 05fcbe6..e700e39 100644 --- a/src/CDSAPI.jl +++ b/src/CDSAPI.jl @@ -22,17 +22,19 @@ function retrieve(name, params, filename; wait=1.0) end end - response = HTTP.request( - "POST", - creds["url"] * "/retrieve/v1/processes/$name/execute/", + response = HTTP.request("POST", + creds["url"] * "/retrieve/v1/processes/$name/execute", ["PRIVATE-TOKEN" => creds["key"]], - body=JSON.json(Dict("inputs" => params)), - verbose=1) + body=JSON.json(Dict("inputs" => params)) + ) body = JSON.parse(String(response.body)) data = Dict("status" => "queued") while data["status"] != "successful" - data = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]), ["PRIVATE-TOKEN" => creds["key"]]) + data = HTTP.request("GET", + creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]), + ["PRIVATE-TOKEN" => creds["key"]] + ) data = JSON.parse(String(data.body)) @info "request status" data["status"] @@ -50,9 +52,8 @@ function retrieve(name, params, filename; wait=1.0) end end - response = HTTP.request( - "GET", - creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]) * "/results/", + response = HTTP.request("GET", + creds["url"] * "/retrieve/v1/jobs/" * string(body["jobID"]) * "/results", ["PRIVATE-TOKEN" => creds["key"]] ) body = JSON.parse(String(response.body)) From 6fcb6ffa5b9190b35863e651b41d6e0b26d3dd1c Mon Sep 17 00:00:00 2001 From: Maximilian Staib Date: Tue, 7 Jan 2025 12:08:40 +0100 Subject: [PATCH 5/6] Remove tar download test --- Project.toml | 2 -- test/retrieve.jl | 37 ------------------------------------- test/runtests.jl | 2 +- 3 files changed, 1 insertion(+), 40 deletions(-) diff --git a/Project.toml b/Project.toml index f3e96f8..7d02845 100644 --- a/Project.toml +++ b/Project.toml @@ -14,9 +14,7 @@ julia = "1.3" [extras] GRIB = "b16dfd50-4035-11e9-28d4-9dfe17e6779b" -GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" -Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" diff --git a/test/retrieve.jl b/test/retrieve.jl index a14ac02..3f866b7 100644 --- a/test/retrieve.jl +++ b/test/retrieve.jl @@ -65,41 +65,4 @@ rm(filepath) rm(ewq_file) end - - @testset "Surface air relative humidity" begin - filepath = joinpath(datadir, "ecc.tar.gz") - response = CDSAPI.retrieve("ecv-for-climate-change", - CDSAPI.py2ju("""{ - 'variable': 'surface_air_relative_humidity', - 'origin': 'era5', - 'product_type': 'monthly_mean', - 'time_aggregation': '1_month_mean', - 'year': '2014', - 'month': '01', - 'data_format': 'tgz', - }"""), - filepath) - - @test typeof(response) <: Dict - @test isfile(filepath) - - # extract contents - ecc_dir = joinpath(datadir, "ecc") - mkdir(ecc_dir) - run(`tar -xzvf $filepath -C $ecc_dir`) - ecc_file = joinpath(ecc_dir, readdir(ecc_dir)[1]) - - # test file contents - GribFile(ecc_file) do f - data = Message(f) - @test data["date"] == 20140101 - @test data["typeOfLevel"] == "surface" - @test data["name"] == "Relative humidity" - end - - # cleanup - rm(filepath) - rm(ecc_file) - rm(ecc_dir) - end end diff --git a/test/runtests.jl b/test/runtests.jl index 9f55593..610582e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ using CDSAPI -using ZipFile, GZip, Tar +using ZipFile using GRIB, NetCDF using Test From f73470ee3ce6b3abed32f54c7f6cd90e8e4b06a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 7 Jan 2025 08:24:29 -0300 Subject: [PATCH 6/6] Fix test target --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7d02845..57d1bc8 100644 --- a/Project.toml +++ b/Project.toml @@ -19,4 +19,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [targets] -test = ["Test", "GRIB", "ZipFile", "NetCDF", "Tar", "GZip"] +test = ["Test", "GRIB", "ZipFile", "NetCDF"]