|
| 1 | +module ClimaOceanCopernicusClimateDataStoreExt |
| 2 | + |
| 3 | +using ClimaOcean |
| 4 | +using CopernicusClimateDataStore |
| 5 | + |
| 6 | +using Oceananigans |
| 7 | +using Oceananigans.DistributedComputations: @root |
| 8 | + |
| 9 | +using Dates |
| 10 | +using ClimaOcean.DataWrangling.ERA5: ERA5Metadata, ERA5Metadatum, ERA5_dataset_variable_names |
| 11 | + |
| 12 | +import ClimaOcean.DataWrangling: download_dataset |
| 13 | + |
| 14 | +""" |
| 15 | + download_dataset(metadata::ERA5Metadata; kwargs...) |
| 16 | +
|
| 17 | +Download ERA5 data for each date in the metadata, returning paths to downloaded files. |
| 18 | +""" |
| 19 | +function download_dataset(metadata::ERA5Metadata; kwargs...) |
| 20 | + paths = Array{String}(undef, length(metadata)) |
| 21 | + for (m, metadatum) in enumerate(metadata) |
| 22 | + paths[m] = download_dataset(metadatum; kwargs...) |
| 23 | + end |
| 24 | + return paths |
| 25 | +end |
| 26 | + |
| 27 | +""" |
| 28 | + download_dataset(meta::ERA5Metadatum; skip_existing=true, kwargs...) |
| 29 | +
|
| 30 | +Download ERA5 data for a single date/time using the CopernicusClimateDataStore package. |
| 31 | +
|
| 32 | +The download is performed using `era5cli` through the CopernicusClimateDataStore package. |
| 33 | +
|
| 34 | +# Keyword Arguments |
| 35 | +- `skip_existing`: Skip download if the file already exists (default: `true`). |
| 36 | +- Additional keyword arguments are passed to `CopernicusClimateDataStore.hourly`. |
| 37 | +
|
| 38 | +# Environment Setup |
| 39 | +Before downloading, you must: |
| 40 | +1. Create an account at https://cds.climate.copernicus.eu/ |
| 41 | +2. Accept the Terms of Use for the ERA5 dataset on the dataset page |
| 42 | +3. Set up your API credentials in `~/.cdsapirc` |
| 43 | +
|
| 44 | +See https://cds.climate.copernicus.eu/how-to-api for details. |
| 45 | +""" |
| 46 | +function download_dataset(meta::ERA5Metadatum; |
| 47 | + skip_existing = true, |
| 48 | + threads = 1, |
| 49 | + additional_kw...) |
| 50 | + |
| 51 | + output_directory = meta.dir |
| 52 | + output_filename = ClimaOcean.DataWrangling.metadata_filename(meta) |
| 53 | + output_path = joinpath(output_directory, output_filename) |
| 54 | + |
| 55 | + # Skip if file already exists |
| 56 | + if skip_existing && isfile(output_path) |
| 57 | + return output_path |
| 58 | + end |
| 59 | + |
| 60 | + # Ensure output directory exists |
| 61 | + mkpath(output_directory) |
| 62 | + |
| 63 | + # Get the ERA5 variable name |
| 64 | + variable_name = ERA5_dataset_variable_names[meta.name] |
| 65 | + |
| 66 | + # Extract date information |
| 67 | + date = meta.dates |
| 68 | + year = Dates.year(date) |
| 69 | + month = Dates.month(date) |
| 70 | + day = Dates.day(date) |
| 71 | + hour = Dates.hour(date) |
| 72 | + |
| 73 | + # Build area constraint from bounding box |
| 74 | + area = build_era5_area(meta.bounding_box) |
| 75 | + |
| 76 | + # Build output prefix (filename without extension) |
| 77 | + output_prefix = first(splitext(output_filename)) |
| 78 | + |
| 79 | + # Perform the download using era5cli via CopernicusClimateDataStore |
| 80 | + @root begin |
| 81 | + downloaded_files = CopernicusClimateDataStore.hourly(; |
| 82 | + variables = variable_name, |
| 83 | + startyear = year, |
| 84 | + months = month, |
| 85 | + days = day, |
| 86 | + hours = hour, |
| 87 | + area = area, |
| 88 | + format = "netcdf", |
| 89 | + outputprefix = output_prefix, |
| 90 | + overwrite = !skip_existing, |
| 91 | + threads = threads, |
| 92 | + splitmonths = false, |
| 93 | + directory = output_directory, |
| 94 | + additional_kw... |
| 95 | + ) |
| 96 | + |
| 97 | + # era5cli generates its own filename suffix, so rename to our expected name |
| 98 | + if !isempty(downloaded_files) |
| 99 | + downloaded_file = first(downloaded_files) |
| 100 | + if downloaded_file != output_path && isfile(downloaded_file) |
| 101 | + mv(downloaded_file, output_path; force=true) |
| 102 | + end |
| 103 | + end |
| 104 | + end |
| 105 | + |
| 106 | + return output_path |
| 107 | +end |
| 108 | + |
| 109 | +##### |
| 110 | +##### Area/bounding box utilities |
| 111 | +##### |
| 112 | + |
| 113 | +build_era5_area(::Nothing) = nothing |
| 114 | + |
| 115 | +const BBOX = ClimaOcean.DataWrangling.BoundingBox |
| 116 | + |
| 117 | +function build_era5_area(bbox::BBOX) |
| 118 | + # ERA5/era5cli uses (lat_max, lon_min, lat_min, lon_max) ordering |
| 119 | + # BoundingBox has longitude = (west, east), latitude = (south, north) |
| 120 | + |
| 121 | + lon = bbox.longitude |
| 122 | + lat = bbox.latitude |
| 123 | + |
| 124 | + if isnothing(lon) || isnothing(lat) |
| 125 | + return nothing |
| 126 | + end |
| 127 | + |
| 128 | + lon_min = lon[1] # west |
| 129 | + lon_max = lon[2] # east |
| 130 | + lat_min = lat[1] # south |
| 131 | + lat_max = lat[2] # north |
| 132 | + |
| 133 | + # Return in era5cli order: (lat_max, lon_min, lat_min, lon_max) |
| 134 | + return (lat = (lat_min, lat_max), lon = (lon_min, lon_max)) |
| 135 | +end |
| 136 | + |
| 137 | +end # module ClimaOceanCopernicusClimateDataStoreExt |
| 138 | + |
0 commit comments