Skip to content
47 changes: 42 additions & 5 deletions src/DataWrangling/Copernicus/Copernicus.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Copernicus

export GLORYSStatic, GLORYSDaily, GLORYSMonthly
export GLORYSStatic, GLORYSDaily, GLORYSMonthly, GLORYSBGCDaily, GLORYSBGCMonthly

using NCDatasets
using Printf
Expand Down Expand Up @@ -40,18 +40,26 @@ default_download_directory(::CopernicusDataset) = download_Copernicus_cache
struct GLORYSStatic <: CopernicusDataset end
struct GLORYSDaily <: CopernicusDataset end
struct GLORYSMonthly <: CopernicusDataset end
struct GLORYSBGCDaily <: CopernicusDataset end
struct GLORYSBGCMonthly <: CopernicusDataset end

dataset_name(::GLORYSStatic) = "GLORYSStatic"
dataset_name(::GLORYSDaily) = "GLORYSDaily"
dataset_name(::GLORYSMonthly) = "GLORYSMonthly"
dataset_name(::GLORYSBGCDaily) = "GLORYSBGCDaily"
dataset_name(::GLORYSBGCMonthly) = "GLORYSBGCMonthly"

all_dates(::GLORYSStatic, var) = [nothing]
all_dates(::GLORYSDaily, var) = range(DateTime("1993-01-01"), stop=DateTime("2021-06-30"), step=Day(1))
all_dates(::GLORYSMonthly, var) = range(DateTime("1993-01-01"), stop=DateTime("2024-12-01"), step=Month(1))
all_dates(::GLORYSBGCDaily, var) = range(DateTime("1993-01-01"), stop=DateTime("2022-12-30"), step=Day(1))
all_dates(::GLORYSBGCMonthly, var) = range(DateTime("1993-01-01"), stop=DateTime("2022-11-30"), step=Month(1))

copernicusmarine_dataset_id(::GLORYSStatic) = "cmems_mod_glo_phy_my_0.083deg_static"
copernicusmarine_dataset_id(::GLORYSDaily) = "cmems_mod_glo_phy_my_0.083deg_P1D-m"
copernicusmarine_dataset_id(::GLORYSMonthly) = "cmems_mod_glo_phy_my_0.083deg_P1M-m"
copernicusmarine_dataset_id(::GLORYSBGCDaily) = "cmems_mod_glo_bgc_my_0.25deg_P1D-m"
copernicusmarine_dataset_id(::GLORYSBGCMonthly) = "cmems_mod_glo_bgc_my_0.25deg_P1M-m"
# :static => "cmems_mod_glo_phy_my_0.083deg_static",

struct CMEMSHourlyAnalysis <: CopernicusDataset end
Expand All @@ -60,12 +68,20 @@ copernicusmarine_dataset_id(::CMEMSHourlyAnalysis) = "cmems_mod_glo_phy_anfc_0.0
CopernicusMetadata{D} = Metadata{<:CopernicusDataset, D}
CopernicusMetadatum = Metadatum{<:CopernicusDataset}

Base.size(::CopernicusMetadatum) = (4320, 2040, 50, 1)
Base.size(::GLORYSStatic) = (4320, 2040, 50, 1)
Base.size(::GLORYSDaily) = (4320, 2040, 50, 1)
Base.size(::GLORYSMonthly) = (4320, 2040, 50, 1)
Base.size(::GLORYSBGCDaily) = (1440, 680, 75, 1)
Base.size(::GLORYSBGCMonthly) = (1440, 680, 75, 1)
reversed_vertical_axis(::CopernicusDataset) = true

available_variables(::CopernicusDataset) = copernicus_dataset_variable_names
available_variables(::GLORYSStatic) = copernicus_physics_dataset_variable_names
available_variables(::GLORYSDaily) = copernicus_physics_dataset_variable_names
available_variables(::GLORYSMonthly) = copernicus_physics_dataset_variable_names
available_variables(::GLORYSBGCDaily) = copernicus_bgc_daily_dataset_variable_names
available_variables(::GLORYSBGCMonthly) = copernicus_bgc_monthly_dataset_variable_names
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how could we have available_variables give out different list of variables based on the dataset type if all variables are in one dictionary?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that's why I did them separately, especially because the BGC variables are non-overlapping

Copy link
Member

@glwagner glwagner Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest using a single "master" copernicus_bgc_monthly_dataset_variable_names, and then writing out the available variable names explicitly in these functions. That way we have a single readable reference for all of the variables that can be downloaded from copernicus. It might become important as the number of variables grows.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I reverted to a 'master' dict copernicus_dataset_variable_names and wrote out the variables explicitly in the avaiable_variables function. Is that what you were thinking?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@glwagner does this look ok now?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, thank you! I can eliminate the repeated code by creating a more organized type hierarchy. do you mind if I commit to your branch?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Go for it!


copernicus_dataset_variable_names = Dict(
copernicus_physics_dataset_variable_names = Dict(
:temperature => "thetao",
:depth => "deptho",
:salinity => "so",
Expand All @@ -78,12 +94,33 @@ copernicus_dataset_variable_names = Dict(
:free_surface => "zos",
)

copernicus_bgc_daily_dataset_variable_names = Dict(
:total_chlorophyll => "chl",
:primary_production => "nppv",
:nitrate => "no3",
:phosphate => "po4",
:dissolved_silicate => "si",
:dissolved_oxygen => "o2",
)

copernicus_bgc_daily_dataset_variable_names_extended = Dict(
copernicus_bgc_daily_dataset_variable_names..., # unpack entries
:dissolved_iron => "fe",
:ph => "ph",
:surface_co2 => "spCO2",
:total_phytoplankton => "phyc",
)

start_date_str(date) = string(date)
end_date_str(date) = string(date)
start_date_str(dates::AbstractVector) = first(dates) |> string
end_date_str(dates::AbstractVector) = last(dates) |> string

dataset_variable_name(metadata::CopernicusMetadata) = copernicus_dataset_variable_names[metadata.name]
dataset_variable_name(::GLORYSStatic) = copernicus_physics_dataset_variable_names[data.name]
dataset_variable_name(::GLORYSDaily) = copernicus_physics_dataset_variable_names[data.name]
dataset_variable_name(::GLORYSMonthly) = copernicus_physics_dataset_variable_names[data.name]
dataset_variable_name(::GLORYSBGCDaily) = copernicus_bgc_daily_dataset_variable_names[data.name]
dataset_variable_name(::GLORYSBGCMonthly) = copernicus_bgc_monthly_dataset_variable_names[data.name]

bbox_strs(::Nothing) = "_nothing", "_nothing"

Expand Down
14 changes: 13 additions & 1 deletion test/test_copernicus_downloading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,25 @@ using PythonCall
using CondaPkg

@testset "Downloading Copernicus data" begin
variables = (:temperature, :salinity, :u_velocity, :v_velocity)
bounding_box = ClimaOcean.DataWrangling.BoundingBox(longitude=(200, 202), latitude=(35, 37))

# Physics datasets
variables = (:temperature, :salinity, :u_velocity, :v_velocity)
dataset = ClimaOcean.DataWrangling.Copernicus.GLORYSDaily()
for variable in variables
metadatum = Metadatum(variable; dataset, bounding_box)
filepath = ClimaOcean.DataWrangling.metadata_path(metadatum)
isfile(filepath) && rm(filepath; force=true)
ClimaOcean.DataWrangling.download_dataset(metadatum)
end

# Biogeochemistry datasets
variables = (:nitrate, :phosphate, :dissolved_silicate)
dataset = ClimaOcean.DataWrangling.Copernicus.GLORYSBGCDaily()
for variable in variables
metadatum = Metadatum(variable; dataset, bounding_box)
filepath = ClimaOcean.DataWrangling.metadata_path(metadatum)
isfile(filepath) && rm(filepath; force=true)
ClimaOcean.DataWrangling.download_dataset(metadatum)
end
end
Loading