diff --git a/Project.toml b/Project.toml index e5e71d2d..5146c83d 100644 --- a/Project.toml +++ b/Project.toml @@ -6,21 +6,40 @@ version = "0.24.2" [deps] CFTime = "179af706-886a-5703-950a-314cd64e0468" +CircularArrays = "7a955b69-7140-5f4e-a0ed-f168c5e2e749" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" +Extremes = "fe3fe864-1b39-11e9-20b8-1f96fa57382d" Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" LombScargle = "fc60dff9-86e7-5f2f-a8a0-edeadbb75bd9" LongMemory = "f5f8e4a8-cb56-40ea-a13e-090cc212d358" MarSwitching = "b2b8a7f1-da70-4e5f-beaa-e2774ec39c2f" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" +Shapefile = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99" [compat] +CFTime = "0.1" +CircularArrays = "1.3" +DataFrames = "1.1" +DimensionalData = "0.27, 0.28, 0.29" +Extremes = "1" +Interpolations = "0.13, 0.14, 0.15" +LombScargle = "1" +LongMemory = "0.1.2" +MarSwitching = "0.2" +NetCDF = "0.10, 0.11, 0.12" +Polynomials = "4" +Shapefile = "0.10, 0.11, 0.12, 0.13" +StableRNGs = "1" +Statistics = "1.11" +YAXArrays = "0.6" +Zarr = "0.9" julia = "1.10" [extras] diff --git a/src/ClimateTools.jl b/src/ClimateTools.jl index 4b860c7b..82445629 100644 --- a/src/ClimateTools.jl +++ b/src/ClimateTools.jl @@ -1,8 +1,10 @@ module ClimateTools +using CircularArrays using CFTime using Dates using DimensionalData +using Extremes using LombScargle using LongMemory using Statistics @@ -13,26 +15,37 @@ using Polynomials using Interpolations using DataFrames using MarSwitching +using Shapefile include("aggregate.jl") include("autocorrelation.jl") include("biascorrect.jl") include("climatology.jl") include("ensembles.jl") -#include("functions.jl") +include("functions.jl") +include("gev.jl") include("markov.jl") include("power.jl") include("plotting.jl") include("processERA5.jl") +include("regrid.jl") +include("spatial.jl") +include("statistics.jl") include("utils.jl") export daily_fct, climato_tp, subsample, dates_builder_yearmonth, dates_builder_yearmonth_hardcode, dates_builder_yearmonthday, dates_builder_yearmonthday_hardcode, diff, cumsum, yearly_clim +export ERA5Land_dailysum +export m2mm export yearly_clim +export quantiles +export regrid_cube export qqmap, qqmap_bulk export ensemble_fct export autocorrelation +export hurst export MSModel +export rlevels_cube end diff --git a/src/EFI.jl b/src/EFI.jl new file mode 100644 index 00000000..888dc834 --- /dev/null +++ b/src/EFI.jl @@ -0,0 +1,25 @@ +function estimate_prob(xout, xin1, xin2; da_cqt_quantile) + + # v_cqt = convert.(Float64,collect(da_cqt[31,31,:].data)) # the grid where we want to evaluate the interpolated value (ex. coords variable below) + # v_eqt = collect(da_eqt[:,31,31].data) # the x-coordinate of the initial data + + # v_cqt_quantile = collect(da_cqt.quantile); # The y-coordinates of the data points, same length as `xp`. + + itp = linear_interpolation(Interpolations.deduplicate_knots!(xin2), da_cqt_quantile, extrapolation_bc=Flat()); + xout .= @. itp(xin1) + +end + + +function estimate_prob(da_cqt::YAXArray, da_eq::YAXArray) + + Quantiles = collect(da_cqt.quantile) + + # Dimensions + indims_da_cqt = InDims("quantile") + indims_da_eqt = InDims("Quantiles") + outdims = OutDims(Dim{:Probability}(Quantiles)) + + return mapCube(estimate_prob, (da_cqt, da_eq), indims=(indims_da_cqt, indims_da_eqt), outdims=outdims, da_cqt_quantile=Quantiles) + +end \ No newline at end of file diff --git a/src/aggregate.jl b/src/aggregate.jl index 36a6d35b..e34dd2f3 100644 --- a/src/aggregate.jl +++ b/src/aggregate.jl @@ -63,9 +63,9 @@ function daily_max_mean_min(cube::YAXArray, kwargs...) index_in_cube = [findall(==(i), time_index) for i in unique(time_index)] # Dimensions - indims = InDims("Ti") + indims = InDims("time") # outdims = OutDims(RangeAxis("time", dates_builder_yearmonthday(new_dates)), CategoricalAxis("stats",["max","mean","min"])) - outdims = OutDims(Dim{:Ti}(dates_builder_yearmonthday(new_dates)), CategoricalAxis("stats",["max","mean","min"])) + outdims = OutDims(Dim{:time}(dates_builder_yearmonthday(new_dates)), CategoricalAxis("stats",["max","mean","min"])) mapCube(daily_max_mean_min, cube, indims=indims, outdims=outdims, index_list=index_in_cube) end @@ -87,7 +87,7 @@ A new YAXArray cube with the computed percentiles along the "time", "longitude", function percentiles(cube::YAXArray, quantiles=[0.1, 0.5, 0.9];lonname="longitude", latname="latitude") indims = InDims("number") - outdims = OutDims("Ti", lonname, latname) + outdims = OutDims("time", lonname, latname) mapCube(percentiles, cube, indims=indims, outdims=outdims, quantiles=quantiles) end @@ -124,8 +124,8 @@ Compute the difference between consecutive time steps in the given `cube`. function diff(cube::YAXArray) - indims = InDims("Ti") - outdims = OutDims("Ti") + indims = InDims("time") + outdims = OutDims("time") mapCube(diff, cube, indims=indims, outdims=outdims) end @@ -161,8 +161,8 @@ A new YAXArray cube with the cumulative sum computed along the "time" dimension. """ function cumsum(cube::YAXArray, kwargs...) - indims = InDims("Ti") - outdims = OutDims("Ti") + indims = InDims("time") + outdims = OutDims("time") mapCube(cumsum, cube, indims=indims, outdims=outdims) end @@ -212,14 +212,14 @@ A new `YAXArray` with aggregated data on a daily basis. """ function daily_fct(cube::YAXArray; fct::Function=mean, shifthour=0, kwargs...) - time_to_index = cube.Ti .+ Dates.Hour(shifthour) + time_to_index = cube.time .+ Dates.Hour(shifthour) time_index = yearmonthday.(time_to_index) new_dates = unique(time_index) index_in_cube = [findall(==(i), time_index) for i in unique(time_index)] # Dimensions - indims = InDims("Ti") - outdims = OutDims(Dim{:Ti}(dates_builder_yearmonthday(new_dates))) + indims = InDims("time") + outdims = OutDims(Dim{:time}(dates_builder_yearmonthday(new_dates))) mapCube(daily_fct, cube, indims=indims, outdims=outdims, fct=fct, index_list=index_in_cube) end @@ -254,14 +254,14 @@ A new YAXArray containing the yearly climatology. """ function yearly_clim(cube::YAXArray; fct::Function=mean, kwargs...) - time_to_index = cube.Ti; + time_to_index = cube.time; time_index = year.(time_to_index); new_dates = unique(time_index); index_in_cube = [findall(==(i), time_index) for i in unique(time_index)] # Dimensions - indims = InDims("Ti") - outdims = OutDims(Dim{:Ti}(dates_builder_yearmonthday_hardcode(new_dates, imois=7, iday=1))) + indims = InDims("time") + outdims = OutDims(Dim{:time}(dates_builder_yearmonthday_hardcode(new_dates, imois=7, iday=1))) mapCube(yearly_clim, cube, fct=fct, indims=indims, outdims=outdims, index_list=index_in_cube) end @@ -295,8 +295,8 @@ function daily_max(cube::YAXArray, kwargs...) index_in_cube = [findall(==(i), time_index) for i in unique(time_index)] # Dimensions - indims = InDims("Ti") - outdims = OutDims(Dim{:Ti}(dates_builder_yearmonthday(new_dates))) + indims = InDims("time") + outdims = OutDims(Dim{:time}(dates_builder_yearmonthday(new_dates))) mapCube(daily_max, cube, indims=indims, outdims=outdims, index_list=index_in_cube) end diff --git a/src/analysis.jl b/src/analysis.jl deleted file mode 100644 index 0c0df926..00000000 --- a/src/analysis.jl +++ /dev/null @@ -1,103 +0,0 @@ -""" - findmax(C::ClimGrid; skipnan::Bool=false) - -Return the maximum element of ClimGrid C and its index. If there are multiple maximal elements, then the first one will be returned. If any data element is NaN, this element is returned. The result is in line with max. As climate data can often have NaN values (irregular polygons, missing weather data), the option to skip NaNs is provided as a keyword argument. - -""" -function Base.findmax(C::ClimGrid; skipnan::Bool=false) - # Get data - data = C[1].data - - if skipnan - idx = findall(data .== NaNMath.maximum(data))[1] - val = data[idx] - - else - idx = argmax(data) - val = data[idx] - end - - return val, idx - -end - -""" - findmin(C::ClimGrid; skipnan::Bool=false) - -Return the minimum element of ClimGrid C and its index. If there are multiple minimal elements, then the first one will be returned. If any data element is NaN, this element is returned. The result is in line with min. As climate data can often have NaN values (irregular polygons, missing weather data), the option to skip NaNs is provided as a keyword argument. - -""" -function Base.findmin(C::ClimGrid; skipnan::Bool=false) - # Get data - data = C[1].data - - if skipnan - idx = findall(data .== NaNMath.minimum(data))[1] - val = data[idx] - - else - idx = argmin(data) - val = data[idx] - end - - return val, idx - -end - -# """ -# dashboard(C::ClimGrid) -# -# This function returns the summary of ClimGrid C, such as maps of mean, maximum and minimum values in ClimGrid C. The annual cycle, the histogram and estimated probability density function and time series. -# """ -# function dashboard(C::ClimGrid; poly = ([])) -# -# # MAPS, WHILE NICE TO LOOK AT, ARE ALREADY COVERED BY THE FUNCTION "mapclimgrid". Should focus here on annual cycles, and other diagnostics such as annualmax and annualmin -# # # Mean, maximum and minimum values for each grid points -# # mean_map = mean(annualmean(C)[1].data, 1)[1, :, :] -# # FD = AxisArray(mean_map, Axis{:lon}(C[1][Axis{:lon}][:]), Axis{:lat}(C[1][Axis{:lat}][:])) -# # meanmap = ClimGrid(FD, model = C.model, experiment = C.experiment, run = C.run, filename = C.filename, dataunits = C.dataunits, latunits = C.latunits, lonunits = C.lonunits, variable = string(C.variable, " - temporal mean"), typeofvar = C.typeofvar, typeofcal = C.typeofcal) -# # -# # max_map = maximum(annualmax(C)[1].data, 1)[1, :, :] -# # FD = AxisArray(max_map, Axis{:lon}(C[1][Axis{:lon}][:]), Axis{:lat}(C[1][Axis{:lat}][:])) -# # maxmap = ClimGrid(FD, model = C.model, experiment = C.experiment, run = C.run, filename = C.filename, dataunits = C.dataunits, latunits = C.latunits, lonunits = C.lonunits, variable = string(C.variable, " - temporal maximum"), typeofvar = C.typeofvar, typeofcal = C.typeofcal) -# # -# # min_map = minimum(annualmin(C)[1].data, 1)[1, :, :] -# # FD = AxisArray(min_map, Axis{:lon}(C[1][Axis{:lon}][:]), Axis{:lat}(C[1][Axis{:lat}][:])) -# # minmap = ClimGrid(FD, model = C.model, experiment = C.experiment, run = C.run, filename = C.filename, dataunits = C.dataunits, latunits = C.latunits, lonunits = C.lonunits, variable = string(C.variable, " - temporal minimum"), typeofvar = C.typeofvar, typeofcal = C.typeofcal) -# -# # Spatial-temporal histogram and PDF -# -# # Annual cycles -# -# -# # Timeseries (annual mean, max and min) -# mean_ts = mean(annualmean(C)[1].data,2:3)[:, 1, 1] -# max_ts = mean(annualmax(C)[1].data,2:3)[:, 1, 1] -# min_ts = mean(annualmin(C)[1].data,2:3)[:, 1, 1] -# -# -# # Annual cycle -# -# -# # Plot everything -# fig = figure("pyplot_subplot_column",figsize=(10,10)) -# subplot(231) # Create the 1st axis of a 3x1 array of axes -# title("Mean") -# subplot(232) # Create the 2nd axis of a 3x1 arrax of axes -# ax = gca() # Get the handle of the current axis -# #ax[:set_yscale]("log") # Set the y axis to a logarithmic scale -# grid("on") -# ylabel("Log Scale") -# title("Minimum") -# subplot(233) # Create the 3rd axis of a 3x1 array of axes -# ax = gca() -# ax[:set_xscale]("log") # Set the x axis to a logarithmic scale -# #xlabel("Log Scale") -# title("Maximum") -# fig[:canvas][:draw]() # Update the figure -# suptitle("3x1 Subplot") -# -# return true -# -# -# end diff --git a/src/autocorrelation.jl b/src/autocorrelation.jl index 25236aa4..df083bff 100644 --- a/src/autocorrelation.jl +++ b/src/autocorrelation.jl @@ -19,9 +19,48 @@ The autocorrelation of the input dataset `ds` along the time dimension. function autocorrelation(ds; lags=30) # Dimensions - indims = InDims("Ti") + indims = InDims("time") outdims = OutDims(Dim{:lags}(collect(1:lags))) return mapCube(ClimateTools.autocorrelation, ds, indims=indims, outdims=outdims, lags=lags, nthreads=Threads.nthreads()) + +end + +function hurst(xout, xin; k::Int=20) + d = LongMemory.ClassicEstimators.rescaled_range_est(Array(xin), k=k) + xout .= d + 0.5 + # xout .= LongMemory.autocorrelation(Array(xin), lags) +end + + + +""" + hurst(ds; k) + +Calculate the Hurst exponent for a time series of dataset `ds`. The Hurst exponent is a measure of the long-term memory of time series data. It can be used to classify time series as: + +- **0.5 < H < 1**: The time series is persistent, meaning that high values are likely to be followed by high values, and low values by low values. +- **H = 0.5**: The time series is a random walk, indicating no correlation between values. +- **0 < H < 0.5**: The time series is anti-persistent, meaning that high values are likely to be followed by low values, and vice versa. + +# Arguments +- `ds`: YAXArray variable. + +# Returns +- `H`: The Hurst exponent of the time series. + + +""" +function hurst(ds; k::Int=20) + + # Dimensions + indims = InDims("time") + # outdims = OutDims(Dim{:hurst}(1)) + outdims = OutDims() + + return mapCube(ClimateTools.hurst, ds, indims=indims, outdims=outdims, k=k, nthreads=Threads.nthreads()) + + + end diff --git a/src/biascorrect.jl b/src/biascorrect.jl index 2d88ba56..c2c9254b 100644 --- a/src/biascorrect.jl +++ b/src/biascorrect.jl @@ -26,14 +26,14 @@ This function performs quantile mapping bias correction on data. function qqmap(obs::YAXArray, ref::YAXArray, fut::YAXArray; method::String="Additive", detrend::Bool=true, order::Int=4, window::Int=15, rankn::Int=50, qmin::Real=0.01, qmax::Real=0.99, thresnan::Float64=0.1, keep_original::Bool=false, interp=Linear(), extrap=Interpolations.Flat()) # Aligning calendars - obs = drop29thfeb(obs, dimx=:rlon, dimy=:rlat, dimt=:Ti) - ref = drop29thfeb(ref, dimx=:rlon, dimy=:rlat, dimt=:Ti) - fut = drop29thfeb(fut, dimx=:rlon, dimy=:rlat, dimt=:Ti) + obs = drop29thfeb(obs, dimx=:rlon, dimy=:rlat, dimt=:time) + ref = drop29thfeb(ref, dimx=:rlon, dimy=:rlat, dimt=:time) + fut = drop29thfeb(fut, dimx=:rlon, dimy=:rlat, dimt=:time) # Julian days vectors - obs_jul = Dates.dayofyear.(lookup(obs, :Ti)) - ref_jul = Dates.dayofyear.(lookup(ref, :Ti)) - fut_jul = Dates.dayofyear.(lookup(fut, :Ti)) + obs_jul = Dates.dayofyear.(lookup(obs, :time)) + ref_jul = Dates.dayofyear.(lookup(ref, :time)) + fut_jul = Dates.dayofyear.(lookup(fut, :time)) # # Monthly values of time vector # obs_month = Dates.month.(lookup(obs, :time)) @@ -55,8 +55,8 @@ function qqmap(obs::YAXArray, ref::YAXArray, fut::YAXArray; method::String="Addi end # Dimensions - indims = InDims("Ti") - outdims = OutDims(Dim{:Ti}(lookup(fut, :Ti))) + indims = InDims("time") + outdims = OutDims(Dim{:time}(lookup(fut, :time))) # @info Threads.nthreads() @@ -205,13 +205,13 @@ function qqmap_bulk(obs::YAXArray, ref::YAXArray, fut::YAXArray; method::String= # Aligning calendars - obs = drop29thfeb(obs, dimx=:rlon, dimy=:rlat, dimt=:Ti) - ref = drop29thfeb(ref, dimx=:rlon, dimy=:rlat, dimt=:Ti) - fut = drop29thfeb(fut, dimx=:rlon, dimy=:rlat, dimt=:Ti) + obs = drop29thfeb(obs, dimx=:rlon, dimy=:rlat, dimt=:time) + ref = drop29thfeb(ref, dimx=:rlon, dimy=:rlat, dimt=:time) + fut = drop29thfeb(fut, dimx=:rlon, dimy=:rlat, dimt=:time) # Dimensions - indims = InDims("Ti") - outdims = OutDims(Dim{:Ti}(lookup(fut, :Ti))) + indims = InDims("time") + outdims = OutDims(Dim{:time}(lookup(fut, :time))) # @info Threads.nthreads() @info "Test" @@ -402,10 +402,10 @@ Removes February 29th from the given YAXArray. This function is used for bias co # Returns - `YAXArray`: The modified YAXArray with February 29th removed. """ -function drop29thfeb(ds; dimx=:lon, dimy=:lat, dimt=:Ti) +function drop29thfeb(ds; dimx=:lon, dimy=:lat, dimt=:time) - # ds_subset = ds[Time=Dates.monthday.(lookup(ds, :Ti)) .!= [(2,29)]] - ds_subset = ds[Ti=Where(x-> Dates.monthday(x) != (2,29))] + + ds_subset = ds[time=Where(x-> Dates.monthday(x) != (2,29))] # Old time vector date_vec = lookup(ds_subset, dimt) @@ -413,7 +413,7 @@ function drop29thfeb(ds; dimx=:lon, dimy=:lat, dimt=:Ti) datevec_noleap = CFTime.reinterpret.(DateTimeNoLeap, date_vec) # Rebuild the YAXArray - newarray = set(ds_subset, Ti => datevec_noleap) + newarray = set(ds_subset, time => datevec_noleap) return newarray diff --git a/src/climatology.jl b/src/climatology.jl index e5059578..a34cfff1 100644 --- a/src/climatology.jl +++ b/src/climatology.jl @@ -1,29 +1,31 @@ """ - climato_tp(ds; fct::Function=sum, iduree=3, lead=0) + climato_tp(ds; fct::Function=sum, iduree=3, lead=1) -Calcul de la somme des précipitations pour tous les mois pour la durée spécifié par iduree. En ce moment, l'overlap des années n'est pas pris en charge: si iduree=3, on aura un Cube qui contient les accumulations sur 3 mois pour toutes les séquences de 3 mois, allant de janvier (janvier-février-mars) à octobre (octobre-novembre-décembre). +Calcul de la somme des précipitations pour tous les mois pour la durée spécifié par iduree. En ce moment, l'overlap des années n'est pas pris en charge: si iduree=3, on aura un Cube qui contient les accumulations sur 3 mois pour toutes les séquences de 3 mois, allant de janvier (janvier-février-mars) à décembre (décembre-janvier-février). """ -function climato_tp(ds; fct::Function=sum, iduree=3, lead=0) +function climato_tp(ds; fct::Function=sum, iduree=3, lead=1) # On veut éviter l'overlap - # mois = 1:12-iduree+1 # ancienne version sans lead - mois = 1:12-iduree+1-lead + + moiscirc = CircularArray(1:12) # timestamp = Array{DateTime}(undef, length(yearvec), length(mois)) - results = Array{YAXArray}(undef, length(mois)) + results = Array{YAXArray}(undef, length(moiscirc)) i=1 # compteur pour les résultats mensuels - for imois in mois + for imois in 1:12 + + month1 = moiscirc[imois+lead] + month2 = moiscirc[imois+iduree-1+lead] - # On subset les mois voulu - # obs_subset = subsample(ds, month1 = imois, month2=imois+iduree-1) # ancienne version sans lead - obs_subset = subsample(ds, month1 = imois+lead, month2=imois+iduree-1+lead) + # On subset les mois voulu + obs_subset = subsample(ds, month1 = month1, month2=month2) # on applique la fonction voulu sur le subset suivant selon le temps - results[i] = yearly_clim(obs_subset, fct=sum) + results[i] = yearly_clim(obs_subset, fct=fct) i += 1 end # On concatenate les cubes de la liste results - return concatenatecubes(results, RangeAxis("Mois", 1:length(mois))) + return concatenatecubes(results, Dim{:Mois}(1:12)) end diff --git a/src/ensembles.jl b/src/ensembles.jl index 4242f3b3..91c91f91 100644 --- a/src/ensembles.jl +++ b/src/ensembles.jl @@ -15,16 +15,18 @@ Compute the daily maximum, mean, and minimum values from the input array `xin` a function ensemble_stats(xout, xin) xout .= NaN if !all(ismissing, xin) - xout[1] = maximum(skipmissing(xin)) - xout[2] = mean(skipmissing(xin)) - xout[3] = minimum(skipmissing(xin)) + xin_missing = xin[.!ismissing.(xin)] + xin_nan = xin_missing[.!isnan.(xin_missing)] + xout[1] = Statistics.maximum(xin_nan) + xout[2] = Statistics.mean(xin_nan) + xout[3] = Statistics.minimum(xin_nan) end end """ - ensemble_stats(cube::YAXArray; dim="Ti") + ensemble_stats(cube::YAXArray; dim="time") -Compute the maximum, mean, and minimum values from a YAXArray Cube along dimension `dim` (default to :Ti). +Compute the maximum, mean, and minimum values from a YAXArray Cube along dimension `dim` (default to :time). # Arguments - `cube::YAXArray`: A Cube of data @@ -36,7 +38,7 @@ Compute the maximum, mean, and minimum values from a YAXArray Cube along dimensi """ -function ensemble_stats(cube::YAXArray; dim="Ti") +function ensemble_stats(cube::YAXArray; dim="time") # Dimensions diff --git a/src/export.jl b/src/export.jl deleted file mode 100644 index a50a006f..00000000 --- a/src/export.jl +++ /dev/null @@ -1,120 +0,0 @@ -""" - write(C::ClimGrid, filename::String) - -Write to disk ClimGrid C to netCDF file. Still experimental, some attributes are hardcoded. -""" -function write(C::ClimGrid, filename::String) - - # Test extension - if extension(filename) .!= ".nc" - filename = string(filename, ".nc") - end - - # This creates a new NetCDF file - # The mode "c" stands for creating a new file (clobber) - ds = Dataset(filename, "c") - - latsymbol, lonsymbol = ClimateTools.getsymbols(C) - x, y, timevec = ClimateTools.getdims(C) - longrid, latgrid = ClimateTools.getgrids(C) - - # DIMENSIONS - defDim(ds, string(lonsymbol),length(x)) - defDim(ds, string(latsymbol), length(y)) - defDim(ds, "time", length(timevec)) - - nclon = defVar(ds, string(lonsymbol), Float64, (string(lonsymbol),)) - nclon.attrib["units"] = C.lonunits#"degrees_east" - nclon.attrib["long_name"] = "longitude" - nclon.attrib["standard_name"] = "longitude" - nclon.attrib["actual_range"] = [minimum(x), maximum(x)] - - nclat = defVar(ds, string(latsymbol), Float64, (string(latsymbol),)) - nclat.attrib["units"] = C.latunits#"degrees_north" - nclat.attrib["long_name"] = "latitude" - nclat.attrib["standard_name"] = "latitude" - nclat.attrib["actual_range"] = [minimum(y), maximum(y)] - - if latsymbol != :lat # a lon-lat grid is needed - ncrlat = defVar(ds, "lat", Float64, (string(lonsymbol), string(latsymbol))) - ncrlat.attrib["long_name"] = "latitude" - ncrlat.attrib["units"] = "degrees" - ncrlat.attrib["standard_name"] = "grid_latitude" - ncrlat.attrib["axis"] = "Y" - ncrlat.attrib["coordinate_defines"] = "point" - ncrlat.attrib["actual_range"] = [minimum(latgrid), maximum(latgrid)] - end - - if lonsymbol != :lon - ncrlon = defVar(ds,"lon", Float64, (string(lonsymbol), string(latsymbol))) - ncrlon.attrib["long_name"] = "longitude" - ncrlon.attrib["units"] = "degrees" - ncrlon.attrib["standard_name"] = "grid_longitude" - ncrlon.attrib["axis"] = "X" - ncrlon.attrib["coordinate_defines"] = "point" - ncrlon.attrib["actual_range"] = [minimum(longrid), maximum(longrid)] - end - - if ClimateTools.@isdefined C.grid_mapping["grid_mapping"] - ncmapping = defVar(ds, C.grid_mapping["grid_mapping"], Char, ()) - elseif ClimateTools.@isdefined C.grid_mapping["grid_mapping_name"] - ncmapping = defVar(ds, C.grid_mapping["grid_mapping_name"], Char, ()) - end - for iattr in keys(C.grid_mapping) - ncmapping.attrib[iattr] = C.grid_mapping[iattr] - end - - # time - timeout = Array{Float64, 1}(undef, length(timevec)) - for itime = 1:length(timevec) - timeout[itime] = NCDatasets.timeencode([timevec[itime]], C.timeattrib["units"], C.timeattrib["calendar"])[1] - end - - - # daysfrom = string("days since ", string(timevec[1] - Dates.Day(1))[1:10], " ", string(timevec[1])[12:end]) - - nctime = defVar(ds,"time", Float64, ("time",)) - nctime.attrib["long_name"] = "time" - nctime.attrib["standard_name"] = "time" - nctime.attrib["axis"] = "T" - nctime.attrib["calendar"] = C.typeofcal#"365_day" - nctime.attrib["units"] = C.timeattrib["units"]#daysfrom - # nctime.attrib["bounds"] = "time_bnds" - nctime.attrib["coordinate_defines"] = "point" - - # Define the variables contained in ClimGrid C - v = defVar(ds, C.variable, Float32, (string(lonsymbol),string(latsymbol), "time")) - - # write attributes - for iattr in keys(C.varattribs) - v.attrib[iattr] = C.varattribs[iattr] - end - - # Define global attributes - for iattr in keys(C.globalattribs) - ds.attrib[iattr] = C.globalattribs[iattr] - end - - v[:] = C[1].data - - # Time vector - nctime[:] = timeout - - # Dimensions - nclon[:] = x - nclat[:] = y - - # Dimensions - if latsymbol != :lat - ncrlon[:] = longrid - end - if latsymbol != :lat - ncrlat[:] = latgrid - end - - # Close file - close(ds) - - return true - -end diff --git a/src/extract.jl b/src/extract.jl index 58a7b544..2a2a7823 100644 --- a/src/extract.jl +++ b/src/extract.jl @@ -1,864 +1,271 @@ -""" - load(file::String, vari::String; poly = Array{Float64}([]), start_date::Tuple, end_date::Tuple, data_units::String = "") - -Returns a ClimGrid type with the data in **file** of variable **vari** inside the polygon **poly**. Metadata is built-in the ClimGrid type, from the netCDF attributes. - -Inside the ClimgGrid type, the data is stored into an AxisArray data type, with time, longitude/x and latitude/y dimensions. - -The polygon provided should be in the -180, +180 longitude format. If the polygon crosses the International Date Line, the polygon should be splitted in multiple parts (i.e. multi-polygons). - -Options for data_units are for precipitation : "mm", which converts the usual "kg m-2 s-1" unit found in netCDF files. For temperature : "Celsius", which converts the usual "Kelvin" unit. - -Temporal subsetting can be done by providing start_date and end-date Tuples of length 1 (year), length 3 (year, month, day) or 6 (hour, minute, second). - -**Note:** load uses [CF conventions](http://cfconventions.org/). If you are unable to read the netCDF file with load, the user will need to read it with low-level functions available in [NetCDF.jl package](https://github.com/JuliaGeo/NetCDF.jl) or [NCDatasets.jl](https://github.com/Alexander-Barth/NCDatasets.jl) or re-create standartized netCDF files. -""" -function load(file::String, vari::String; poly = ([]), start_date::Tuple=(Inf,), end_date::Tuple=(Inf,), data_units::String = "") - - # TODO this file is a complete mess, but it works. Clean it up! - - # Get attributes - ds = NCDatasets.Dataset(file) - attribs_dataset = ds.attrib - attribs = Dict(attribs_dataset) - - # Data pointer - data_pointer = ds[vari] - - # The following attributes should be set for netCDF files that follows CF conventions. - # project, institute, model, experiment, frequency - project = ClimateTools.project_id(attribs_dataset) - institute = ClimateTools.institute_id(attribs_dataset) - model = ClimateTools.model_id(attribs_dataset) - experiment = ClimateTools.experiment_id(attribs_dataset) - frequency = ClimateTools.frequency_var(attribs_dataset) - runsim = ClimateTools.runsim_id(attribs_dataset) - grid_mapping = ClimateTools.get_mapping(keys(ds))#, vari) - - if grid_mapping == "Regular_longitude_latitude" - latstatus = false - else - latstatus = true - end - - # Get dimensions names - lonname = get_dimname(ds, "X") - latname = get_dimname(ds, "Y") - timename = get_dimname(ds, "T") - if ndims(data_pointer) == 4 - levname = get_dimname(ds, "Z") - levunits = ds[levname].attrib["units"] - end - # Dimension vector - lat_raw = nomissing(ds[latname][:], NaN) - lon_raw = nomissing(ds[lonname][:], NaN) - - # Create dict with latname and lonname - if ndims(data_pointer) == 3 - dimension_dict = Dict(["lon" => lonname, - "lat" => latname, - "time" => timename]) - - elseif ndims(data_pointer) == 4 - dimension_dict = Dict(["lon" => lonname, - "lat" => latname, - "height" => levname, - "time" => "time"]) - end - - # Get units - dataunits = ds[vari].attrib["units"] - latunits = ds[latname].attrib["units"] - lonunits = ds[lonname].attrib["units"] - - # Calendar - caltype = get_calendar(ds, timename) - - # Get variable attributes - varattrib = Dict(ds[vari].attrib) - - if latstatus # means we don't have a "regular" grid - # Get names of grid - latgrid_name = ClimateTools.latgridname(ds) - longrid_name = ClimateTools.longridname(ds) - - latgrid = nomissing(ds[latgrid_name][:], NaN) - longrid = nomissing(ds[longrid_name][:], NaN) - - # Ensure we have a grid - if ndims(latgrid) == 1 && ndims(longrid) == 1 - longrid, latgrid = ndgrid(lon_raw, lat_raw) - map_attrib = Dict(["grid_mapping" => "Regular_longitude_latitude"]) - end - - map_attrib = build_grid_mapping(ds, grid_mapping) - varattrib["grid_mapping"] = grid_mapping - - else # if no grid provided, create one - longrid, latgrid = ndgrid(lon_raw, lat_raw) - map_attrib = Dict(["grid_mapping" => grid_mapping]) - varattrib["grid_mapping"] = grid_mapping - end - - # ===================== - # TIME - # Get time resolution - timeV = ds[timename][:] - if frequency == "N/A" || !ClimateTools.@isdefined frequency - try - try - frequency = string(diff(timeV)[2]) - catch - frequency = string(diff(timeV)[1]) - end - catch - frequency = "N/A" - end - end - - timeattrib = Dict(ds[timename].attrib) - T = typeof(timeV[1]) - idxtimebeg, idxtimeend = timeindex(timeV, start_date, end_date, T) - timeV = timeV[idxtimebeg:idxtimeend] - - # ================== - # Spatial shift if grid is 0-360. - rotatedgrid = false - if sum(longrid .> 180) >= 1 - rotatedgrid = true - - # Shift 360 degrees grid to -180, +180 degrees - longrid_flip = ClimateTools.shiftgrid_180_west_east(longrid) - - # Shift 360 degrees vector to -180, +180 degrees - lon_raw_flip = ClimateTools.shiftvector_180_west_east(lon_raw) - - else - longrid_flip = longrid # grid is already "flipped" by design - end - - # =================== - # GET DATA - if !isempty(poly) - - # Test to see if the polygon crosses the meridian - meridian = ClimateTools.meridian_check(poly) - - # Build mask based on provided polygon - msk = inpolygrid(longrid_flip, latgrid, poly) - - if sum(isnan.(msk)) == length(msk) # no grid point inside polygon - throw(error("No grid points found inside the provided polygon")) - end - - if rotatedgrid - # Regrid msk to original grid if the grid have been rotated to get proper index for data extraction - msk = ClimateTools.shiftarray_east_west(msk, longrid_flip) - end - - #Extract data based on mask - data_ext = ClimateTools.extractdata(data_pointer, msk, idxtimebeg, idxtimeend) - data_ext = nomissing(data_ext, NaN) - - - #new mask (e.g. representing the region of the polygon) - begin - I = Base.findall(!isnan, msk) - idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) - end - minXgrid = minimum(idlon) - maxXgrid = maximum(idlon) - minYgrid = minimum(idlat) - maxYgrid = maximum(idlat) - msk = msk[minXgrid:maxXgrid, minYgrid:maxYgrid] - data_mask = applymask(data_ext, msk) # needed when polygon is not rectangular - - if rotatedgrid - # Regrid msk to shifted grid if the grid have been rotated to get final mask - #shiftarray_west_east(msk, longrid) - end - - # Get lon_raw and lat_raw for such region - lon_raw = lon_raw[minXgrid:maxXgrid] - lat_raw = lat_raw[minYgrid:maxYgrid] - - if map_attrib["grid_mapping"] == "Regular_longitude_latitude" - - lon_raw = ClimateTools.shiftvector_180_west_east(lon_raw) - end - - # Idem for longrid and latgrid - if meridian - longrid = ClimateTools.shiftgrid_180_east_west(longrid)#grideast, gridwest) - longrid = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - - # Re-translate to -180, 180 if 0, 360 - - if rotatedgrid - - longrid_flip = ClimateTools.shiftgrid_180_west_east(longrid) - - data = permute_west_east(data_mask, longrid)#idxwest, idxeast) - msk = ClimateTools.permute_west_east(msk, longrid) - - # TODO Try to trim padding when meridian is crossed and model was on a 0-360 coords - # idlon, idlat = findn(.!isnan.(msk)) - # minXgrid = minimum(idlon) - # maxXgrid = maximum(idlon) - # minYgrid = minimum(idlat) - # maxYgrid = maximum(idlat) - # - # msk = msk[minXgrid:maxXgrid, minYgrid:maxYgrid] - # data = data[:, minXgrid:maxXgrid, minYgrid:maxYgrid] - # lon_raw_flip = lon_raw[minXgrid:maxXgrid] - # - # longrid_flip = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - # longrid_flip = ClimateTools.shiftgrid_180_west_east(longrid_flip) - # latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] +# """ +# getdim_lat(ds::NCDatasets.Dataset) + +# Returns the name of the "latitude" dimension and the status related to a regular grid. The latitude dimension is usually "latitude", "lat", "y", "yc", "rlat". +# """ +# function getdim_lat(ds::NCDatasets.Dataset) + +# if sum(keys(ds.dim) .== "rlat") == 1 +# return "rlat", true +# elseif sum(keys(ds.dim) .== "lat") == 1 +# return "lat", false +# elseif sum(keys(ds.dim) .== "latitude") == 1 +# return "latitude", false +# elseif sum(keys(ds.dim) .== "y") == 1 +# return "y", true +# elseif sum(keys(ds.dim) .== "yc") == 1 +# return "yc", true +# elseif sum(keys(ds.dim) .== "lat_c") == 1 +# return "lat_c", false +# else +# error("Manually verify x/lat dimension name") +# end + +# end + +# """ +# getdim_lon(ds::NCDatasets.Dataset) + +# Returns the name of the "longitude" dimension and the status related to a regular grid. The longitude dimension is usually "longitue", "lon", "x", "xc", "rlon". +# """ +# function getdim_lon(ds::NCDatasets.Dataset) + +# if sum(keys(ds.dim) .== "rlon") == 1 +# return "rlon", true +# elseif sum(keys(ds.dim) .== "lon") == 1 +# return "lon", false +# elseif sum(keys(ds.dim) .== "longitude") == 1 +# return "longitude", false +# elseif sum(keys(ds.dim) .== "x") == 1 +# return "x", false +# elseif sum(keys(ds.dim) .== "xc") == 1 +# return "xc", false +# elseif sum(keys(ds.dim) .== "lon_c") == 1 +# return "lon_c", false +# else +# error("Manually verify x/lat dimension name") +# end + +# end + +# """ +# latgridname(ds::NCDatasets.Dataset) + +# Returns the name of the latitude grid when datasets is not on a rectangular grid. +# """ +# function latgridname(ds::NCDatasets.Dataset) + +# # if in("lat", keys(ds)) +# # return "lat" +# # elseif in("latitude", keys(ds)) +# # return "latitude" +# # elseif in("lat_c", keys(ds)) +# # return "lat_c" +# # else +# # error("Variable name is not supported. File an issue on https://github.com/Balinus/ClimateTools.jl/issues") +# # end + +# names = ["degree_north", +# "degrees_north", +# "degreeN", +# "degreesN", +# "degree_N", +# "degrees_N"] + +# varnames = keys(ds) + +# found_var = "NA" + +# for ivar in varnames + +# if isdefined(ds[ivar], :attrib) +# if haskey(ds[ivar].attrib, "units") +# if in(ds[ivar].attrib["units"], names) +# found_var = ivar +# end +# end +# end + +# end + +# return found_var + +# end + +# """ +# longridname(ds::NCDatasets.Dataset) + +# Returns the name of the longitude grid when datasets is not on a rectangular grid. +# """ +# function longridname(ds::NCDatasets.Dataset) + +# # if in("lon", keys(ds)) +# # return "lon" +# # elseif in("longitude", keys(ds)) +# # return "longitude" +# # elseif in("lon_c", keys(ds)) +# # return "lon_c" +# # else +# # error("Variable name is not supported. File an issue on https://github.com/Balinus/ClimateTools.jl/issues") +# # end + +# names = ["degree_east", +# "degrees_east", +# "degreeE", +# "degreesE", +# "degree_E", +# "degrees_E"] + +# varnames = keys(ds) - # data = applymask(data, msk) +# found_var = "NA" - else - data = data_mask - end +# for ivar in varnames - else +# if isdefined(ds[ivar], :attrib) +# if haskey(ds[ivar].attrib, "units") +# if in(ds[ivar].attrib["units"], names) +# found_var = ivar +# end +# end +# end - if rotatedgrid # flip in original grid - longrid = ClimateTools.shiftgrid_180_east_west(longrid) #grideast, gridwest) - end +# end - longrid = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] +# return found_var - if rotatedgrid - longrid_flip = shiftgrid_180_west_east(longrid) - lon_raw_flip = shiftvector_180_east_west(lon_raw) +# end - data = permute_west_east(data_mask, longrid)#idxwest, idxeast) - msk = ClimateTools.permute_west_east(msk, longrid) - else - data = data_mask - end +# """ +# getdim_tim(ds::NCDatasets.Dataset) - end +# Returns the name of the "time" dimension. +# """ +# function getdim_tim(ds::NCDatasets.Dataset) - elseif isempty(poly) # no polygon clipping +# if sum(keys(ds.dim) .== "time") == 1 +# return "time", false +# elseif sum(keys(ds.dim) .== "tim") == 1 +# return "tim", false +# else +# error("Manually verify time dimension name") +# end - msk = Array{Float64}(ones((size(data_pointer, 1), size(data_pointer, 2)))) - data_ext = ClimateTools.extractdata(data_pointer, msk, idxtimebeg, idxtimeend) - # replace_missing!(data_ext) - data_ext = nomissing(data_ext, NaN) - # data_ext = convert(data_ext, Float32) +# end - if rotatedgrid +# """ +# extractdata(data, msk, idxtimebeg, idxtimeend) - # Flip data "west-east" - data = ClimateTools.permute_west_east(data_ext, longrid) +# Returns the data contained in netCDF file, using the appropriate mask and time index. Used internally by `load`. - else - data = data_ext - end +# """ +# function extractdata(data, msk, idxtimebeg, idxtimeend) - end +# # idlon, idlat = findn(.!isnan.(msk)) +# begin +# I = Base.findall(!isnan, msk) +# idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) +# end +# minXgrid = minimum(idlon) +# maxXgrid = maximum(idlon) +# minYgrid = minimum(idlat) +# maxYgrid = maximum(idlat) - if rotatedgrid - longrid .= longrid_flip - lon_raw .= lon_raw_flip - end +# if ndims(data) == 3 +# dataout = data[minXgrid:maxXgrid, minYgrid:maxYgrid, idxtimebeg:idxtimeend] +# # Permute dims +# # data = permutedims(data, [3, 1, 2]) +# elseif ndims(data) == 4 +# dataout = data[minXgrid:maxXgrid, minYgrid:maxYgrid, :, idxtimebeg:idxtimeend] +# # Permute dims +# # data = permutedims(data, [4, 1, 2, 3]) +# end - # Convert units of optional argument data_units is provided - if data_units == "Celsius" && (vari == "tas" || vari == "tasmax" || vari == "tasmin") && dataunits == "K" - data .-= 273.15 - dataunits = "°C" - varattrib["units"] = "Celsius" - # @warn "Using Celsius can be problematic for arithmetic operations. Best practice is to keep Kelvin and only convert to Celsius at the end with the overloaded ClimateTools.uconvert function." - end +# return dataout +# end - if data_units == "mm" && vari == "pr" && (dataunits == "kg m-2 s-1" || dataunits == "mm s-1") +# function extractdata2D(data, msk) - factor = timeresolution(ds[timename]) - # factor = pr_timefactor(rez) - data .*= factor.value - dataunits = "mm" - varattrib["standard_name"] = "precipitation" - varattrib["units"] = "mm" - end +# # idlon, idlat = findn(.!isnan.(msk)) +# begin +# I = Base.findall(!isnan, msk) +# idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) +# end +# minXgrid = minimum(idlon) +# maxXgrid = maximum(idlon) +# minYgrid = minimum(idlat) +# maxYgrid = maximum(idlat) - # Create AxisArray from variable "data" - if ndims(data) == 3 - # Convert data to AxisArray - dataOut = AxisArray(data, Axis{Symbol(lonname)}(lon_raw), Axis{Symbol(latname)}(lat_raw), Axis{:time}(timeV)) - elseif ndims(data) == 4 # this imply a 3D field (height component) - # Get level vector - dlev = ds[levname][:] - # Convert data to AxisArray - dataOut = AxisArray(data, Axis{Symbol(lonname)}(lon_raw), Axis{Symbol(latname)}(lat_raw), Axis{Symbol(levname)}(dlev), Axis{:time}(timeV)) - else - throw(error("load takes only 3D and 4D variables for the moment")) - end +# data = data[minXgrid:maxXgrid, minYgrid:maxYgrid] - C = ClimGrid(dataOut, longrid=longrid, latgrid=latgrid, msk=msk, grid_mapping=map_attrib, dimension_dict=dimension_dict, timeattrib=timeattrib, model=model, frequency=frequency, experiment=experiment, run=runsim, project=project, institute=institute, filename=file, dataunits=dataunits, latunits=latunits, lonunits=lonunits, variable=vari, typeofvar=vari, typeofcal=caltype, varattribs=varattrib, globalattribs=attribs) +# return data - close(ds) - # NetCDF.close(ncfile) +# end - return C +# """ +# get_mapping(ds::Array{String,1}) -end +# Returns the grid_mapping of Dataset *ds* +# """ +# function get_mapping(K::Array{String,1}) -""" - load(files::Array{String,1}, vari::String; poly = ([]), start_date::Date = Date(-4000), end_date::Date = Date(-4000), data_units::String = "") +# if in("rotated_pole", K) +# return "rotated_pole" -Loads and merge the files contained in the arrar files. -""" -function load(files::Array{String,1}, vari::String; poly=([]), start_date::Tuple=(Inf,), end_date::Tuple=(Inf,), data_units::String="") +# elseif in("lambert_conformal_conic", K) +# return "lambert_conformal_conic" - nfiles = length(files) - C = Array{ClimGrid}(undef, nfiles) # initialize # TODO better initialization - datesort = Array{Any}(undef, nfiles) - Cout = [] +# elseif in("rotated_latitude_longitude", K) +# return "rotated_latitude_longitude" - p = Progress(nfiles*2, 3, "Loading files: ") +# elseif in("rotated_mercator", K) +# return "rotated_mercator" - for ifile = 1:nfiles +# elseif in("crs", K) +# return "crs" - C[ifile] = load(files[ifile], vari, poly = poly, start_date=start_date, end_date=end_date, data_units=data_units) - datesort[ifile] = get_timevec(C[ifile])[1] +# elseif in("polar_stereographic", K) +# return "polar_stereographic" - next!(p) - end +# else +# return "Regular_longitude_latitude" +# end - # Sort files based on timevector to ensure that merge results in amonotone increase in time - idx = sortperm(datesort) - C = C[idx] +# end - for imod = 1:nfiles - if imod == 1 - Cout = C[imod] - else - Cout = merge(Cout, C[imod]) - end - next!(p) - end +# """ +# get_mapping(ds::Array{String,1}) - return Cout -end - - -""" - load2D(file::String, vari::String; poly=[], data_units::String="") +# Returns the grid_mapping of Dataset *ds* +# """ +# function get_mapping(ds::NCDatasets.Dataset, vari) -Returns a 2D array. Should be used for *fixed* data, such as orography. -""" -function load2D(file::String, vari::String; poly=[], data_units::String="") - # Get attributes - - ds = NCDatasets.Dataset(file) - attribs_dataset = ds.attrib - attribs = Dict(attribs_dataset) - - # Data pointer - data_pointer = ds[vari] - - project = ClimateTools.project_id(attribs_dataset) - institute = ClimateTools.institute_id(attribs_dataset) - model = ClimateTools.model_id(attribs_dataset) - experiment = ClimateTools.experiment_id(attribs_dataset) - frequency = ClimateTools.frequency_var(attribs_dataset) - runsim = ClimateTools.runsim_id(attribs_dataset) - grid_mapping = ClimateTools.get_mapping(keys(ds)) - - if grid_mapping == "Regular_longitude_latitude" - latstatus = false - else - latstatus = true - end - - # Get dimensions names - lonname = get_dimname(ds, "X") - latname = get_dimname(ds, "Y") - - dataunits = ds[vari].attrib["units"] - latunits = ds[latname].attrib["units"] - lonunits = ds[lonname].attrib["units"] +# grid = "" - # Create dict with latname and lonname - dimension_dict = Dict(["lon" => lonname, "lat" => latname]) - - # lat_raw = NetCDF.ncread(file, latname) - lat_raw = nomissing(ds[latname][:], NaN) - # lon_raw = NetCDF.ncread(file, lonname) - lon_raw = nomissing(ds[lonname][:], NaN) +# try +# grid = ds[vari].attrib["grid_mapping"] +# catch +# grid = "Regular_longitude_latitude" +# end - # Get variable attributes - varattrib = Dict(ds[vari].attrib) +# return grid - if latstatus # means we don't have a "regular" grid - # Get names of grid - latgrid_name = latgridname(ds) - longrid_name = longridname(ds) - # latgrid = NetCDF.ncread(file, latgrid_name) - latgrid = nomissing(ds[latgrid_name][:], NaN) - # longrid = NetCDF.ncread(file, longrid_name) - longrid = nomissing(ds[longrid_name][:], NaN) +# end - map_attrib = build_grid_mapping(ds, grid_mapping) - varattrib["grid_mapping"] = grid_mapping - - else # if no grid provided, create one - longrid, latgrid = ndgrid(lon_raw, lat_raw) - map_attrib = Dict(["grid_mapping" => grid_mapping]) - varattrib["grid_mapping"] = grid_mapping - end - - # ================== - # Spatial shift if grid is 0-360. - rotatedgrid = false - if sum(longrid .> 180) >= 1 - rotatedgrid = true +# function build_grid_mapping(ds::NCDatasets.Dataset, grid_mapping::String) - # Shift 360 degrees grid to -180, +180 degrees - longrid_flip = ClimateTools.shiftgrid_180_west_east(longrid) - - # Shift 360 degrees vector to -180, +180 degrees - lon_raw_flip = ClimateTools.shiftvector_180_west_east(lon_raw) - - else - longrid_flip = longrid # grid is already "flipped" by design - end - - # =================== - # GET DATA - if !isempty(poly) +# if ClimateTools.@isdefined grid_mapping +# # map_dim = varattrib[grid_mapping] +# map_attrib = Dict(ds[grid_mapping].attrib) +# map_attrib["grid_mapping"] = grid_mapping +# else +# error("File an issue on https://github.com/Balinus/ClimateTools.jl/issues to get the grid supported") +# end - # Test to see if the polygon crosses the meridian - meridian = ClimateTools.meridian_check(poly) +# return map_attrib - # Build mask based on provided polygon - msk = inpolygrid(longrid_flip, latgrid, poly) - - if sum(isnan.(msk)) == length(msk) # no grid point insode polygon - throw(error("No grid points found inside the provided polygon")) - end - - if rotatedgrid - # Regrid msk to original grid if the grid have been rotated to get proper index for data extraction - msk = ClimateTools.shiftarray_east_west(msk, longrid_flip) - end - - #Extract data based on mask - data_ext = ClimateTools.extractdata2D(data_pointer, msk) - # replace_missing!(data_ext) - data_ext = nomissing(data_ext, NaN) - # data_ext = convert(data_ext, Float32) - - begin - I = Base.findall(!isnan, msk) - idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) - end - - minXgrid = minimum(idlon) - maxXgrid = maximum(idlon) - minYgrid = minimum(idlat) - maxYgrid = maximum(idlat) - msk = msk[minXgrid:maxXgrid, minYgrid:maxYgrid] - data_mask = applymask(data_ext, msk) # needed when polygon is not rectangular - - if rotatedgrid - # Regrid msk to shifted grid if the grid have been rotated to get final mask - #shiftarray_west_east(msk, longrid) - end - - # Get lon_raw and lat_raw for such region - lon_raw = lon_raw[minXgrid:maxXgrid] - lat_raw = lat_raw[minYgrid:maxYgrid] - - if map_attrib["grid_mapping"] == "Regular_longitude_latitude" - - lon_raw = ClimateTools.shiftvector_180_west_east(lon_raw) - - end - - # Idem for longrid and latgrid - if meridian - longrid = ClimateTools.shiftgrid_180_east_west(longrid)#grideast, gridwest) - longrid = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - - # Re-translate to -180, 180 if 0, 360 - - if rotatedgrid - - longrid_flip = ClimateTools.shiftgrid_180_west_east(longrid) - - data = permute_west_east(data_mask, longrid)#idxwest, idxeast) - msk = ClimateTools.permute_west_east(msk, longrid) - - # TODO Try to trim padding when meridian is crossed and model was on a 0-360 coords - # idlon, idlat = findn(.!isnan.(msk)) - # minXgrid = minimum(idlon) - # maxXgrid = maximum(idlon) - # minYgrid = minimum(idlat) - # maxYgrid = maximum(idlat) - # - # msk = msk[minXgrid:maxXgrid, minYgrid:maxYgrid] - # data = data[:, minXgrid:maxXgrid, minYgrid:maxYgrid] - # lon_raw_flip = lon_raw[minXgrid:maxXgrid] - # - # longrid_flip = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - # longrid_flip = ClimateTools.shiftgrid_180_west_east(longrid_flip) - # latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - - # data = applymask(data, msk) - - else - data = data_mask - end - - else - - if rotatedgrid # flip in original grid - longrid = shiftgrid_180_east_west(longrid) #grideast, gridwest) - end - - longrid = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - - if rotatedgrid - - longrid_flip = shiftgrid_180_west_east(longrid) - - lon_raw_flip = shiftvector_180_east_west(lon_raw) - - data = permute_west_east(data_mask, longrid)#idxwest, idxeast) - msk = ClimateTools.permute_west_east(msk, longrid) - - else - data = data_mask - end - end - - elseif isempty(poly) # no polygon clipping - msk = Array{Float64}(ones((size(data_pointer, 1), size(data_pointer, 2)))) - data_ext = extractdata2D(data_pointer, msk) - # replace_missing!(data_ext) - data_ext = nomissing(data_ext, NaN) - # data_ext = convert(data_ext, Float32) - - if rotatedgrid - # Flip data "west-east" - data = permute_west_east(data_ext, longrid) - else - data = data_ext - end - - end - - if rotatedgrid - longrid = longrid_flip - lon_raw = lon_raw_flip - end - - # Convert data to AxisArray - dataOut = AxisArray(data, Axis{Symbol(lonname)}(lon_raw), Axis{Symbol(latname)}(lat_raw)) - - - - C = ClimGrid(dataOut, longrid=longrid, latgrid=latgrid, msk=msk, grid_mapping=map_attrib, dimension_dict=dimension_dict, model=model, frequency=frequency, experiment=experiment, run=runsim, project=project, institute=institute, filename=file, dataunits=dataunits, latunits=latunits, lonunits=lonunits, variable=vari, typeofvar=vari, typeofcal="fixed", varattribs=varattrib, globalattribs=attribs) - - close(ds) - - return C - -end - -model_id(attrib::NCDatasets.Attributes) = get(attrib,"model_id", get(attrib, "parent_source_id", get(attrib,"model","N/A"))) - -experiment_id(attrib::NCDatasets.Attributes) = get(attrib,"experiment_id",get(attrib,"experiment","N/A")) - -project_id(attrib::NCDatasets.Attributes) = get(attrib,"project_id", get(attrib, "mip_era", get(attrib,"project","N/A"))) - -institute_id(attrib::NCDatasets.Attributes) = get(attrib,"institute_id",get(attrib, "institution_id", get(attrib,"institute","N/A"))) - -frequency_var(attrib::NCDatasets.Attributes) = get(attrib,"frequency","N/A") - -runsim_id(attrib::NCDatasets.Attributes) = get(attrib, "parent_experiment_rip", get(attrib,"driving_model_ensemble_member","N/A")) - - -""" - getdim_lat(ds::NCDatasets.Dataset) - -Returns the name of the "latitude" dimension and the status related to a regular grid. The latitude dimension is usually "latitude", "lat", "y", "yc", "rlat". -""" -function getdim_lat(ds::NCDatasets.Dataset) - - if sum(keys(ds.dim) .== "rlat") == 1 - return "rlat", true - elseif sum(keys(ds.dim) .== "lat") == 1 - return "lat", false - elseif sum(keys(ds.dim) .== "latitude") == 1 - return "latitude", false - elseif sum(keys(ds.dim) .== "y") == 1 - return "y", true - elseif sum(keys(ds.dim) .== "yc") == 1 - return "yc", true - elseif sum(keys(ds.dim) .== "lat_c") == 1 - return "lat_c", false - else - error("Manually verify x/lat dimension name") - end - -end - -""" - getdim_lon(ds::NCDatasets.Dataset) - -Returns the name of the "longitude" dimension and the status related to a regular grid. The longitude dimension is usually "longitue", "lon", "x", "xc", "rlon". -""" -function getdim_lon(ds::NCDatasets.Dataset) - - if sum(keys(ds.dim) .== "rlon") == 1 - return "rlon", true - elseif sum(keys(ds.dim) .== "lon") == 1 - return "lon", false - elseif sum(keys(ds.dim) .== "longitude") == 1 - return "longitude", false - elseif sum(keys(ds.dim) .== "x") == 1 - return "x", false - elseif sum(keys(ds.dim) .== "xc") == 1 - return "xc", false - elseif sum(keys(ds.dim) .== "lon_c") == 1 - return "lon_c", false - else - error("Manually verify x/lat dimension name") - end - -end - -""" - latgridname(ds::NCDatasets.Dataset) - -Returns the name of the latitude grid when datasets is not on a rectangular grid. -""" -function latgridname(ds::NCDatasets.Dataset) - - # if in("lat", keys(ds)) - # return "lat" - # elseif in("latitude", keys(ds)) - # return "latitude" - # elseif in("lat_c", keys(ds)) - # return "lat_c" - # else - # error("Variable name is not supported. File an issue on https://github.com/Balinus/ClimateTools.jl/issues") - # end - - names = ["degree_north", - "degrees_north", - "degreeN", - "degreesN", - "degree_N", - "degrees_N"] - - varnames = keys(ds) - - found_var = "NA" - - for ivar in varnames - - if isdefined(ds[ivar], :attrib) - if haskey(ds[ivar].attrib, "units") - if in(ds[ivar].attrib["units"], names) - found_var = ivar - end - end - end - - end - - return found_var - -end - -""" - longridname(ds::NCDatasets.Dataset) - -Returns the name of the longitude grid when datasets is not on a rectangular grid. -""" -function longridname(ds::NCDatasets.Dataset) - - # if in("lon", keys(ds)) - # return "lon" - # elseif in("longitude", keys(ds)) - # return "longitude" - # elseif in("lon_c", keys(ds)) - # return "lon_c" - # else - # error("Variable name is not supported. File an issue on https://github.com/Balinus/ClimateTools.jl/issues") - # end - - names = ["degree_east", - "degrees_east", - "degreeE", - "degreesE", - "degree_E", - "degrees_E"] - - varnames = keys(ds) - - found_var = "NA" - - for ivar in varnames - - if isdefined(ds[ivar], :attrib) - if haskey(ds[ivar].attrib, "units") - if in(ds[ivar].attrib["units"], names) - found_var = ivar - end - end - end - - end - - return found_var - - - -end - -""" - getdim_tim(ds::NCDatasets.Dataset) - -Returns the name of the "time" dimension. -""" -function getdim_tim(ds::NCDatasets.Dataset) - - if sum(keys(ds.dim) .== "time") == 1 - return "time", false - elseif sum(keys(ds.dim) .== "tim") == 1 - return "tim", false - else - error("Manually verify time dimension name") - end - -end - -""" - extractdata(data, msk, idxtimebeg, idxtimeend) - -Returns the data contained in netCDF file, using the appropriate mask and time index. Used internally by `load`. - -""" -function extractdata(data, msk, idxtimebeg, idxtimeend) - - # idlon, idlat = findn(.!isnan.(msk)) - begin - I = Base.findall(!isnan, msk) - idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) - end - minXgrid = minimum(idlon) - maxXgrid = maximum(idlon) - minYgrid = minimum(idlat) - maxYgrid = maximum(idlat) - - if ndims(data) == 3 - dataout = data[minXgrid:maxXgrid, minYgrid:maxYgrid, idxtimebeg:idxtimeend] - # Permute dims - # data = permutedims(data, [3, 1, 2]) - elseif ndims(data) == 4 - dataout = data[minXgrid:maxXgrid, minYgrid:maxYgrid, :, idxtimebeg:idxtimeend] - # Permute dims - # data = permutedims(data, [4, 1, 2, 3]) - end - - return dataout -end - -function extractdata2D(data, msk) - - # idlon, idlat = findn(.!isnan.(msk)) - begin - I = Base.findall(!isnan, msk) - idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) - end - minXgrid = minimum(idlon) - maxXgrid = maximum(idlon) - minYgrid = minimum(idlat) - maxYgrid = maximum(idlat) - - data = data[minXgrid:maxXgrid, minYgrid:maxYgrid] - - return data - -end - - -""" - get_mapping(ds::Array{String,1}) - -Returns the grid_mapping of Dataset *ds* -""" -function get_mapping(K::Array{String,1}) - - if in("rotated_pole", K) - return "rotated_pole" - - elseif in("lambert_conformal_conic", K) - return "lambert_conformal_conic" - - elseif in("rotated_latitude_longitude", K) - return "rotated_latitude_longitude" - - elseif in("rotated_mercator", K) - return "rotated_mercator" - - elseif in("crs", K) - return "crs" - - elseif in("polar_stereographic", K) - return "polar_stereographic" - - else - return "Regular_longitude_latitude" - end - -end - -""" - get_mapping(ds::Array{String,1}) - -Returns the grid_mapping of Dataset *ds* -""" -function get_mapping(ds::NCDatasets.Dataset, vari) - - grid = "" - - try - grid = ds[vari].attrib["grid_mapping"] - catch - grid = "Regular_longitude_latitude" - end - - return grid - - -end - -function build_grid_mapping(ds::NCDatasets.Dataset, grid_mapping::String) - - if ClimateTools.@isdefined grid_mapping - # map_dim = varattrib[grid_mapping] - map_attrib = Dict(ds[grid_mapping].attrib) - map_attrib["grid_mapping"] = grid_mapping - else - error("File an issue on https://github.com/Balinus/ClimateTools.jl/issues to get the grid supported") - end - - return map_attrib - -end +# end diff --git a/src/functions.jl b/src/functions.jl index 1af76e69..23c8591d 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -165,289 +165,17 @@ function inpolygrid(lon::AbstractArray{N, 2} where N, lat::AbstractArray{N,2} wh end -# TODO define interpolation for 4D grid +function inpolygrid(ds::Dataset, poly::AbstractArray{N,2} where N) -# """ -# C = regrid(A::ClimGrid, B::ClimGrid; solver=Kriging(), min=[], max=[]) - -# Interpolate `ClimGrid` A onto the lon-lat grid of `ClimGrid` B, using the GeoStats.jl methods. - -# Min and max optional keyword are used to constraint the results of the interpolation. For example, interpolating bounded fields can lead to unrealilstic values, such as negative precipitation. In that case, one would use min=0.0 to convert negative precipitation to 0.0. - -# """ -# function regrid(A::ClimGrid, B::ClimGrid; solver=Kriging(), min=[], max=[]) - -# # --------------------------------------- -# # Get lat-lon information from ClimGrid B -# londest, latdest = ClimateTools.getgrids(B) - -# # Get lat-lon information from ClimGrid A -# lonorig, latorig = ClimateTools.getgrids(A) -# # points = hcat(lonorig[:], latorig[:]) - -# # ----------------------------------------- -# # Get initial data and time from ClimGrid A -# dataorig = A[1].data -# timeorig = get_timevec(A)#[1][Axis{:time}][:] # the function will need to loop over time - -# # --------------------- -# # Allocate output Array -# OUT = zeros(Float64, (size(B.data, 1), size(B.data, 2), length(timeorig))) - -# # ------------------------ -# # Interpolation -# interp!(OUT, timeorig, dataorig, lonorig, latorig, londest, latdest, solver=solver, msk=B.msk) - -# if !isempty(min) -# OUT[OUT .<= min] .= min -# end - -# if !isempty(max) -# OUT[OUT .>= max] .= max -# end - -# # ----------------------- -# # Construct AxisArrays and ClimGrid struct from array OUT -# latsymbol = Symbol(B.dimension_dict["lat"]) -# lonsymbol = Symbol(B.dimension_dict["lon"]) -# dataOut = AxisArray(OUT, Axis{lonsymbol}(B[1][Axis{lonsymbol}][:]), Axis{latsymbol}(B[1][Axis{latsymbol}][:]), Axis{:time}(timeorig)) - -# C = ClimateTools.ClimGrid(dataOut, longrid=B.longrid, latgrid=B.latgrid, msk=B.msk, grid_mapping=B.grid_mapping, dimension_dict=B.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=A.experiment, run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=B.latunits, lonunits=B.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) - -# end - -# """ -# interp!(OUT, timeorig, dataorig, lonorig, latorig, londest, latdest, vari; frac=0.5) - -# Interpolation of `dataorig` onto longitude grid `londest` and latitude grid `latdest`. Used internally by `kriging` and 'inv_distance'. frac is the fraction of data points used. -# """ -# function interp!(OUT, timeorig, dataorig, lonorig, latorig, londest, latdest; solver=solver, msk=[]) - -# # Ensure we have same coords type -# if typeof(lonorig[1]) != typeof(londest[1]) # means we're going towards Float32 -# if typeof(dataorig[1]) != Float32 -# dataorig = Float32.(dataorig) -# end -# if typeof(lonorig[1]) == Float64 -# lonorig = Float32.(lonorig) -# end -# if typeof(londest[1]) == Float64 -# londest = Float32.(londest) -# end -# if typeof(latorig[1]) == Float64 -# latorig = Float32.(latorig) -# end -# if typeof(lonorig[1]) == Float64 -# lonorig = Float32.(lonorig) -# end -# end - -# # Variable -# # target = Symbol(vari) - -# # Destination grid -# SG = StructuredGrid(londest, latdest) - -# # # Solver -# # n = Int(floor(frac*length(lonorig[:]))) -# # -# # if lowercase(method) == "kriging" -# # solver = Kriging(target => (maxneighbors=n,)) -# # elseif lowercase(method) == "inv_dist" -# # solver = InvDistWeight(target => (neighbors=n,)) -# # end - - -# # Threads.@threads for t = 1:length(timeorig) -# p = Progress(length(timeorig), 5, "Regridding: ") -# for t = 1:length(timeorig) - -# SD = StructuredGridData(OrderedDict(target => dataorig[:, :, t]), lonorig, latorig) - -# problem = EstimationProblem(SD, SG, target) -# solution = solve(problem, solver) -# data_interp = reshape(solution.mean[target], size(londest)) - -# # Apply mask from ClimGrid destination -# if !isempty(msk) -# OUT[:, :, t] = data_interp .* msk -# else -# OUT[:, :, t] = data_interp -# end - -# next!(p) -# end -# end - - -""" - C = griddata(A::ClimGrid, B::ClimGrid; min=[], max=[]) - -Interpolate `ClimGrid` A onto the lon-lat grid of `ClimGrid` B, -where A and B are `ClimGrid`. + londim, latdim = getdimnames(ds) -Min and max optional keyword are used to constraint the results of the interpolation. For example, interpolating bounded fields can lead to unrealilstic values, such as negative precipitation. In that case, one would use min=0.0 to convert negative precipitation to 0.0. + longrid_flip = collect(ds["lon"].data) + latgrid = collect(ds["lat"].data) -""" -function griddata(A::ClimGrid, B::ClimGrid; method="linear", min=[], max=[]) - - # --------------------------------------- - # Get lat-lon information from ClimGrid B - londest, latdest = ClimateTools.getgrids(B) - - # Get lat-lon information from ClimGrid A - lonorig, latorig = ClimateTools.getgrids(A) - points = hcat(lonorig[:], latorig[:]) - - # ----------------------------------------- - # Get initial data and time from ClimGrid A - dataorig = A[1].data - timeorig = get_timevec(A)#[1][Axis{:time}][:] # the function will need to loop over time - - # --------------------- - # Allocate output Array - OUT = zeros(Float64, (size(B.data, 1), size(B.data, 2), length(timeorig))) - - # ------------------------ - # Interpolation - # interp!(OUT, timeorig, dataorig, lonorig, latorig, londest, latdest, A.variable, frac=frac) - griddata!(OUT, timeorig, dataorig, points, londest, latdest, method=method, msk=B.msk) - - if !isempty(min) - OUT[OUT .<= min] .= min - end - if !isempty(max) - OUT[OUT .>= max] .= max - end - - # ----------------------- - # Construct AxisArrays and ClimGrid struct from array OUT - latsymbol = Symbol(B.dimension_dict["lat"]) - lonsymbol = Symbol(B.dimension_dict["lon"]) - dataOut = AxisArray(OUT, Axis{lonsymbol}(B[1][Axis{lonsymbol}][:]), Axis{latsymbol}(B[1][Axis{latsymbol}][:]), Axis{:time}(timeorig)) - - C = ClimateTools.ClimGrid(dataOut, longrid=B.longrid, latgrid=B.latgrid, msk=B.msk, grid_mapping=B.grid_mapping, dimension_dict=B.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=A.experiment, run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=B.latunits, lonunits=B.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) - -end +end -""" - C = griddata(A::ClimGrid, londest::AbstractArray{N, 1} where N, latdest::AbstractArray{N, 1} where N) -Interpolate `ClimGrid` A onto lat-lon grid defined by londest and latdest vector or array. If an array is provided, it is assumed that the grid is curvilinear (not a regular lon-lat grid) and the user needs to provide the dimension vector ("x" and "y") for such a grid. - -""" -function griddata(A::ClimGrid, lon::AbstractArray{N, T} where N where T, lat::AbstractArray{N, T} where N where T; method="linear", dimx=[], dimy=[], min=[], max=[]) - - # Get lat-lon information from ClimGrid A - lonorig, latorig = getgrids(A) - points = hcat(lonorig[:], latorig[:]) - - # ----------------------------------------- - # Get initial data and time from ClimGrid A - dataorig = A[1].data - timeorig = A[1][Axis{:time}][:] # the function will need to loop over time - - if ndims(lon) == 1 - londest, latdest = ndgrid(lon, lat) - dimx = lon - dimy = lat - elseif ndims(lon) == 2 - londest = lon - latdest = lat - @assert !isempty(dimx) - @assert !isempty(dimy) - else - throw(error("Grid should be a vector or a grid")) - end - - # --------------------- - # Allocate output Array - if ndims(lon) == 1 - OUT = zeros(Float64, (length(lon), length(lat), length(timeorig))) - elseif ndims(lon) == 2 - OUT = zeros(Float64, (size(lon, 1), size(lon, 2), length(timeorig))) - end - - # ------------------------ - # Interpolation - griddata!(OUT, timeorig, dataorig, points, londest, latdest, method=method) - - if !isempty(min) - OUT[OUT .<= min] .= min - end - - if !isempty(max) - OUT[OUT .>= max] .= max - end - - # ----------------------- - # Construct AxisArrays and ClimGrid struct from array OUT - dataOut = AxisArray(OUT, Axis{:x}(dimx), Axis{:y}(dimy), Axis{:time}(timeorig)) - msk = Array{Float64}(ones((size(OUT, 1), size(OUT, 2)))) - - if ndims(lon) == 1 - grid_mapping = Dict(["grid_mapping_name" => "Regular_longitude_latitude"]) - dimension_dict = Dict(["lon" => "lon", "lat" => "lat"]) - elseif ndims(lon) == 2 - grid_mapping = Dict(["grid_mapping_name" => "Curvilinear_grid"]) - dimension_dict = Dict(["lon" => "x", "lat" => "y"]) - end - - C = ClimateTools.ClimGrid(dataOut, longrid=londest, latgrid=latdest, msk=msk, grid_mapping=grid_mapping, dimension_dict=dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=A.experiment, run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits="degrees_north", lonunits="degrees_east", variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) - -end - -""" - griddata!(OUT, timeorig, dataorig, points, londest, latdest, method, ;msk=[]) -Interpolation of `dataorig` onto longitude grid `londest` and latitude grid `latdest`. Used internally by `regrid`. -""" -function griddata!(OUT, timeorig, dataorig, points, londest, latdest; method="linear", msk=[]) - - p = Progress(length(timeorig), 5, "Regridding: ") - for t = 1:length(timeorig) - - # Points values - val = dataorig[:, :, t][:] - - # Call scipy griddata - data_interp = scipy.griddata(points, val, (londest, latdest), method=method) - - # Apply mask from ClimGrid destination - if !isempty(msk) - OUT[:, :, t] .= data_interp .* msk - else - OUT[:, :, t] .= data_interp - end - - next!(p) - end -end - -""" - getgrids(C::ClimGrid) - -Returns longitude and latitude grids of ClimGrid C. -""" -function getgrids(C::ClimGrid) - longrid = C.longrid - latgrid = C.latgrid - return longrid, latgrid -end - -""" - getdims(C::ClimGrid) - -Returns dimensions vectors of C -""" -function getdims(C::ClimGrid) - latsymbol, lonsymbol = ClimateTools.getsymbols(C) - x = C[1][Axis{lonsymbol}][:] - y = C[1][Axis{latsymbol}][:] - timevec = get_timevec(C) - - return x, y, timevec -end """ applymask(A::AbstractArray{N, n}, mask::AbstractArray{N, n}) @@ -506,16 +234,16 @@ function applymask(A::AbstractArray{N,1} where N, mask::AbstractArray{N, 1} wher return modA end -function applymask(C::ClimGrid, mask::AbstractArray{N, 1} where N) +# function applymask(C::ClimGrid, mask::AbstractArray{N, 1} where N) - A = C.data.data +# A = C.data.data - modA = applymask(A, mask) +# modA = applymask(A, mask) - Anew = buildarrayinterface(modA, C) +# Anew = buildarrayinterface(modA, C) - return ClimGrid(Anew, longrid=C.longrid, latgrid=C.latgrid, msk=mask, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs) -end +# return ClimGrid(Anew, longrid=C.longrid, latgrid=C.latgrid, msk=mask, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs) +# end macro isdefined(var) quote @@ -586,189 +314,189 @@ function permute_east_west2D(data::AbstractArray{N,2} where N, iwest, ieast) end -""" - ensemble_mean(C::ClimGrid...) +# """ +# ensemble_mean(C::ClimGrid...) -Returns the Ensemble mean of ClimGrids C.. -""" -function ensemble_mean(C; skipnan=true) +# Returns the Ensemble mean of ClimGrids C.. +# """ +# function ensemble_mean(C; skipnan=true) - # Create list of AxisArrays contained inside the ClimGrids - # Climref = C[1] - axisarrays = Array{Any}(undef, length(C)) - # unitsClims = Array{Any}(undef, length(C)) +# # Create list of AxisArrays contained inside the ClimGrids +# # Climref = C[1] +# axisarrays = Array{Any}(undef, length(C)) +# # unitsClims = Array{Any}(undef, length(C)) - for k = 1:length(C) - # datatmp[.!isnan.(datatmp) - axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) - # unitsClims[k] = unit(C[k][1][1]) - end +# for k = 1:length(C) +# # datatmp[.!isnan.(datatmp) +# axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) +# # unitsClims[k] = unit(C[k][1][1]) +# end - # if length(unique(unitsClims)) != 1 - # throw(error("ClimGrids needs to have the same physical units.")) - # end +# # if length(unique(unitsClims)) != 1 +# # throw(error("ClimGrids needs to have the same physical units.")) +# # end - # ENSEMBLE MEAN - n = length(axisarrays) # number of members - dataout = sum(axisarrays) / n # ensemble mean +# # ENSEMBLE MEAN +# n = length(axisarrays) # number of members +# dataout = sum(axisarrays) / n # ensemble mean - # # reapply unit - # dataout = [dataout][1]unitsClims[1] +# # # reapply unit +# # dataout = [dataout][1]unitsClims[1] - # Build AxisArray - data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) +# # Build AxisArray +# data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) - # Ensemble metadata - globalattribs = Dict() - globalattribs["history"] = "Ensemble mean" - globalattribs["models"] = "" - for k = 1:length(C) - globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) - end +# # Ensemble metadata +# globalattribs = Dict() +# globalattribs["history"] = "Ensemble mean" +# globalattribs["models"] = "" +# for k = 1:length(C) +# globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) +# end - # Return ClimGrid - return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble mean", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) +# # Return ClimGrid +# return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble mean", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) -end +# end -""" - ensemble_std(C::ClimGrid...) +# """ +# ensemble_std(C::ClimGrid...) -Returns the Ensemble standard deviation of climatological means of ClimGrids C.. -""" -function ensemble_std(C; skipnan=true) +# Returns the Ensemble standard deviation of climatological means of ClimGrids C.. +# """ +# function ensemble_std(C; skipnan=true) - # Create list of AxisArrays contained inside the ClimGrids - # Climref = C[1] - axisarrays = Array{Any}(undef, length(C)) - # unitsClims = Array{Any}(undef, length(C)) +# # Create list of AxisArrays contained inside the ClimGrids +# # Climref = C[1] +# axisarrays = Array{Any}(undef, length(C)) +# # unitsClims = Array{Any}(undef, length(C)) - for k = 1:length(C) - # datatmp[.!isnan.(datatmp) - axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) - # unitsClims[k] = unit(C[k][1][1]) - end +# for k = 1:length(C) +# # datatmp[.!isnan.(datatmp) +# axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) +# # unitsClims[k] = unit(C[k][1][1]) +# end - # if typeof(unitsClims[1]) != Unitful.FreeUnits{(),NoDims,nothing} - # if length(unique(unitsClims)) != 1 - # throw(error("ClimGrids needs to have the same physical units.")) - # end - # end +# # if typeof(unitsClims[1]) != Unitful.FreeUnits{(),NoDims,nothing} +# # if length(unique(unitsClims)) != 1 +# # throw(error("ClimGrids needs to have the same physical units.")) +# # end +# # end - # ENSEMBLE STD - dataout = std(axisarrays) +# # ENSEMBLE STD +# dataout = std(axisarrays) - # # reapply unit - # dataout = [dataout][1]unitsClims[1] +# # # reapply unit +# # dataout = [dataout][1]unitsClims[1] - # Build AxisArray - data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) +# # Build AxisArray +# data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) - # Ensemble metadata - globalattribs = Dict() - globalattribs["history"] = "Ensemble mean" - globalattribs["models"] = "" - for k = 1:length(C) - globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) - end +# # Ensemble metadata +# globalattribs = Dict() +# globalattribs["history"] = "Ensemble mean" +# globalattribs["models"] = "" +# for k = 1:length(C) +# globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) +# end - # Return ClimGrid - return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble standard deviation", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) +# # Return ClimGrid +# return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble standard deviation", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) -end +# end -""" - ensemble_max(C::ClimGrid...) +# """ +# ensemble_max(C::ClimGrid...) -Returns the Ensemble maximum of climatological means of ClimGrids C.. -""" -function ensemble_max(C; skipnan=true) +# Returns the Ensemble maximum of climatological means of ClimGrids C.. +# """ +# function ensemble_max(C; skipnan=true) - # Create list of AxisArrays contained inside the ClimGrids - # Climref = C[1] - axisarrays = Array{Any}(undef, length(C)) - # unitsClims = Array{Any}(undef, length(C)) +# # Create list of AxisArrays contained inside the ClimGrids +# # Climref = C[1] +# axisarrays = Array{Any}(undef, length(C)) +# # unitsClims = Array{Any}(undef, length(C)) - for k = 1:length(C) - # datatmp[.!isnan.(datatmp) - axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) - # unitsClims[k] = unit(C[k][1][1]) - end +# for k = 1:length(C) +# # datatmp[.!isnan.(datatmp) +# axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) +# # unitsClims[k] = unit(C[k][1][1]) +# end - # if typeof(unitsClims[1]) != Unitful.FreeUnits{(),NoDims,nothing} - # if length(unique(unitsClims)) != 1 - # throw(error("ClimGrids needs to have the same physical units.")) - # end - # end +# # if typeof(unitsClims[1]) != Unitful.FreeUnits{(),NoDims,nothing} +# # if length(unique(unitsClims)) != 1 +# # throw(error("ClimGrids needs to have the same physical units.")) +# # end +# # end - # ENSEMBLE MAX - dataout = maximum(axisarrays) +# # ENSEMBLE MAX +# dataout = maximum(axisarrays) - # # reapply unit - # dataout = [dataout][1]unitsClims[1] +# # # reapply unit +# # dataout = [dataout][1]unitsClims[1] - # Build AxisArray - data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) +# # Build AxisArray +# data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) - # Ensemble metadata - globalattribs = Dict() - globalattribs["history"] = "Ensemble mean" - globalattribs["models"] = "" - for k = 1:length(C) - globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) - end +# # Ensemble metadata +# globalattribs = Dict() +# globalattribs["history"] = "Ensemble mean" +# globalattribs["models"] = "" +# for k = 1:length(C) +# globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) +# end - # Return ClimGrid - return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble maximum", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) +# # Return ClimGrid +# return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble maximum", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) -end +# end -""" - ensemble_min(C::ClimGrid...) +# """ +# ensemble_min(C::ClimGrid...) -Returns the Ensemble minimum of climatological means of ClimGrids C.. -""" -function ensemble_min(C; skipnan=true) +# Returns the Ensemble minimum of climatological means of ClimGrids C.. +# """ +# function ensemble_min(C; skipnan=true) - # Create list of AxisArrays contained inside the ClimGrids - # Climref = C[1] - axisarrays = Array{Any}(undef, length(C)) - # unitsClims = Array{Any}(undef, length(C)) +# # Create list of AxisArrays contained inside the ClimGrids +# # Climref = C[1] +# axisarrays = Array{Any}(undef, length(C)) +# # unitsClims = Array{Any}(undef, length(C)) - for k = 1:length(C) - # datatmp[.!isnan.(datatmp) - axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) - # unitsClims[k] = unit(C[k][1][1]) - end +# for k = 1:length(C) +# # datatmp[.!isnan.(datatmp) +# axisarrays[k] = periodmean(C[k])[1]#[1][.!isnan.(C[k][1])], dims=3) +# # unitsClims[k] = unit(C[k][1][1]) +# end - # if typeof(unitsClims[1]) != Unitful.FreeUnits{(),NoDims,nothing} - # if length(unique(unitsClims)) != 1 - # throw(error("ClimGrids needs to have the same physical units.")) - # end - # end +# # if typeof(unitsClims[1]) != Unitful.FreeUnits{(),NoDims,nothing} +# # if length(unique(unitsClims)) != 1 +# # throw(error("ClimGrids needs to have the same physical units.")) +# # end +# # end - # ENSEMBLE MIN - dataout = minimum(axisarrays) +# # ENSEMBLE MIN +# dataout = minimum(axisarrays) - # # reapply unit - # dataout = [dataout][1]unitsClims[1] +# # # reapply unit +# # dataout = [dataout][1]unitsClims[1] - # Build AxisArray - data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) +# # Build AxisArray +# data_axis = ClimateTools.buildarrayinterface(dataout, C[1]) - # Ensemble metadata - globalattribs = Dict() - globalattribs["history"] = "Ensemble mean" - globalattribs["models"] = "" - for k = 1:length(C) - globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) - end +# # Ensemble metadata +# globalattribs = Dict() +# globalattribs["history"] = "Ensemble mean" +# globalattribs["models"] = "" +# for k = 1:length(C) +# globalattribs["models"] = string(globalattribs["models"], ", ", C[k].model) +# end - # Return ClimGrid - return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble minimum", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) +# # Return ClimGrid +# return ClimGrid(data_axis, longrid=C[1].longrid, latgrid=C[1].latgrid, msk=C[1].msk, grid_mapping=C[1].grid_mapping, dimension_dict=C[1].dimension_dict, timeattrib=C[1].timeattrib, model=globalattribs["models"], frequency="Climatology ensemble minimum", experiment="Multi-models ensemble", run="Multi-models ensemble", project="Multi-models ensemble", institute="Multi-models ensemble", filename="muliple_files", dataunits=C[1].dataunits, latunits=C[1].latunits, lonunits=C[1].lonunits, variable=C[1].variable, typeofvar=C[1].typeofvar, typeofcal="Climatology", varattribs=C[1].varattribs, globalattribs=globalattribs) -end +# end function maximum(arrays::Array{Any}) @@ -822,57 +550,57 @@ function minimum(arrays::Array{Any}) end -""" - polyfit(C::ClimGrid) +# """ +# polyfit(C::ClimGrid) -Returns an array of the polynomials functions of each grid points contained in ClimGrid C. -""" -function polyfit(C::ClimGrid) +# Returns an array of the polynomials functions of each grid points contained in ClimGrid C. +# """ +# function polyfit(C::ClimGrid) - x = collect(1:length(C[1][Axis{:time}][:])) - dataout = Array{Polynomial{typeof(C[1][1,1,1])}}(undef, size(C[1], 1),size(C[1], 2)) +# x = collect(1:length(C[1][Axis{:time}][:])) +# dataout = Array{Polynomial{typeof(C[1][1,1,1])}}(undef, size(C[1], 1),size(C[1], 2)) - # Reshaping for multi-threads - datain_rshp = reshape(C[1].data, size(C[1].data,1)*size(C[1].data,2), size(C[1].data,3)) - dataout_rshp = reshape(dataout, size(dataout,1)*size(dataout,2),size(dataout,3)) +# # Reshaping for multi-threads +# datain_rshp = reshape(C[1].data, size(C[1].data,1)*size(C[1].data,2), size(C[1].data,3)) +# dataout_rshp = reshape(dataout, size(dataout,1)*size(dataout,2),size(dataout,3)) - # Threads.@threads for k = 1:size(datain_rshp,1) - for k = 1:size(datain_rshp,1) - y = datain_rshp[k,:] - polynomial = Polynomials.fit(x, y, 4) - polynomial[0] = 0.0 - dataout_rshp[k] = polynomial +# # Threads.@threads for k = 1:size(datain_rshp,1) +# for k = 1:size(datain_rshp,1) +# y = datain_rshp[k,:] +# polynomial = Polynomials.fit(x, y, 4) +# polynomial[0] = 0.0 +# dataout_rshp[k] = polynomial - end +# end - return dataout -end +# return dataout +# end -""" - polyval(C::ClimGrid, polynomial::Array{Poly{Float64}}) +# """ +# polyval(C::ClimGrid, polynomial::Array{Poly{Float64}}) - Returns a ClimGrid containing the values, as estimated from polynomial function polyn. -""" -function polyval(C::ClimGrid, polynomial::Array{Polynomial{N},2} where N) +# Returns a ClimGrid containing the values, as estimated from polynomial function polyn. +# """ +# function polyval(C::ClimGrid, polynomial::Array{Polynomial{N},2} where N) - datain = C[1].data - dataout = Array{typeof(C[1][1,1,1])}(undef, (size(C[1], 1), size(C[1],2), size(C[1], 3))) +# datain = C[1].data +# dataout = Array{typeof(C[1][1,1,1])}(undef, (size(C[1], 1), size(C[1],2), size(C[1], 3))) - # Reshape - datain_rshp = reshape(datain, size(datain,1)*size(datain,2),size(datain,3)) - dataout_rshp = reshape(dataout, size(dataout,1)*size(dataout,2),size(dataout,3)) - polynomial_rshp = reshape(polynomial, size(polynomial,1)*size(polynomial,2)) +# # Reshape +# datain_rshp = reshape(datain, size(datain,1)*size(datain,2),size(datain,3)) +# dataout_rshp = reshape(dataout, size(dataout,1)*size(dataout,2),size(dataout,3)) +# polynomial_rshp = reshape(polynomial, size(polynomial,1)*size(polynomial,2)) - # Threads.@threads for k = 1:size(datain_rshp,1) - for k = 1:size(datain_rshp,1) - val = polynomial_rshp[k].(datain_rshp[k,:]) - dataout_rshp[k,:] = val - end +# # Threads.@threads for k = 1:size(datain_rshp,1) +# for k = 1:size(datain_rshp,1) +# val = polynomial_rshp[k].(datain_rshp[k,:]) +# dataout_rshp[k,:] = val +# end - dataout2 = buildarrayinterface(dataout, C) +# dataout2 = buildarrayinterface(dataout, C) - return ClimGrid(dataout2; longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs) -end +# return ClimGrid(dataout2; longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs) +# end """ extension(url::String) @@ -948,49 +676,49 @@ function get_threshold(obsvec, refvec; thres=0.95) return mean([quantile(obsvec[obsvec .>= 1.0],thres) quantile(refvec[refvec .>= 1.0], thres)]) end -function timestep(C::ClimGrid, ts::Tuple) - D = temporalsubset(C, ts, ts) -end +# function timestep(C::ClimGrid, ts::Tuple) +# D = temporalsubset(C, ts, ts) +# end -function timestep(C::ClimGrid, ts::Int) - timevec = get_timevec(C) - d = timevec[ts] - t = (year(d), month(d), day(d)) - D = temporalsubset(C, t, t) -end +# function timestep(C::ClimGrid, ts::Int) +# timevec = get_timevec(C) +# d = timevec[ts] +# t = (year(d), month(d), day(d)) +# D = temporalsubset(C, t, t) +# end -function timestep(C::ClimGrid, d) - t = (year(d), month(d), day(d), hour(d), minute(d), second(d)) - D = temporalsubset(C, t, t) -end +# function timestep(C::ClimGrid, d) +# t = (year(d), month(d), day(d), hour(d), minute(d), second(d)) +# D = temporalsubset(C, t, t) +# end -function get_max_clusters(x::Vector{Cluster}) +# function get_max_clusters(x::Vector{Cluster}) - dataout = Array{typeof(x[1].value[1])}(undef,0) +# dataout = Array{typeof(x[1].value[1])}(undef,0) - for i = eachindex(x) - append!(dataout, x[i].value) - end +# for i = eachindex(x) +# append!(dataout, x[i].value) +# end - return dataout +# return dataout -end +# end -function get_position_clusters(x::Vector{Cluster}) +# function get_position_clusters(x::Vector{Cluster}) - dataout = Array{Int}(undef, 0) +# dataout = Array{Int}(undef, 0) - for i = eachindex(x) - append!(dataout, x[i].position) - #dataout[i] = x[i].position[1] - end +# for i = eachindex(x) +# append!(dataout, x[i].position) +# #dataout[i] = x[i].position[1] +# end - return dataout +# return dataout -end +# end """ @@ -1001,217 +729,217 @@ Calculate mean while omitting NaN, Inf, etc. finitemean(x) = mean(filter(x -> !isnan(x)&isfinite(x),x)) finitemean(x,y) = mapslices(finitemean,x,dims=y) -""" - periodmean(C::ClimGrid; startdate::Tuple, enddate::Tuple) +# """ +# periodmean(C::ClimGrid; startdate::Tuple, enddate::Tuple) -Mean of array data over a given period. -""" -function periodmean(C::ClimGrid; level=1, start_date::Tuple=(Inf, ), end_date::Tuple=(Inf,)) +# Mean of array data over a given period. +# """ +# function periodmean(C::ClimGrid; level=1, start_date::Tuple=(Inf, ), end_date::Tuple=(Inf,)) - if start_date != (Inf,) || end_date != (Inf,) - C = temporalsubset(C, start_date, end_date) - end +# if start_date != (Inf,) || end_date != (Inf,) +# C = temporalsubset(C, start_date, end_date) +# end - datain = C.data.data +# datain = C.data.data - # Mean and squeeze - if ndims(datain) == 2 - dataout = datain - elseif ndims(datain) == 3 - if size(datain, 3) == 1 # already an average on single value - dataout = dropdims(datain, dims=3) - else - dataout = dropdims(finitemean(datain, 3), dims=3) - end - elseif ndims(datain) == 4 - datain_lev = datain[:,:,level,:] # extract level - if size(datain_lev, 3) == 1 - dataout = dropdims(datain_lev, dims=3) - else - dataout = dropdims(finitemean(datain_lev, 3), dims=3) - end - end +# # Mean and squeeze +# if ndims(datain) == 2 +# dataout = datain +# elseif ndims(datain) == 3 +# if size(datain, 3) == 1 # already an average on single value +# dataout = dropdims(datain, dims=3) +# else +# dataout = dropdims(finitemean(datain, 3), dims=3) +# end +# elseif ndims(datain) == 4 +# datain_lev = datain[:,:,level,:] # extract level +# if size(datain_lev, 3) == 1 +# dataout = dropdims(datain_lev, dims=3) +# else +# dataout = dropdims(finitemean(datain_lev, 3), dims=3) +# end +# end - # Build output AxisArray - FD = buildarray_climato(C, dataout) +# # Build output AxisArray +# FD = buildarray_climato(C, dataout) - # Return climGrid type containing the indice - return ClimGrid(FD, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable="periodmean", typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs) -end +# # Return climGrid type containing the indice +# return ClimGrid(FD, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable="periodmean", typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs) +# end -""" - verticalmean(C::ClimGrid; startdate::Tuple, enddate::Tuple) +# """ +# verticalmean(C::ClimGrid; startdate::Tuple, enddate::Tuple) -Mean of array data over all vertical levels. -""" -function verticalmean(C::ClimGrid) +# Mean of array data over all vertical levels. +# """ +# function verticalmean(C::ClimGrid) - datain = C.data.data +# datain = C.data.data - if ndims(datain) < 4 - error("There is no vertical levels in the dataset") - end +# if ndims(datain) < 4 +# error("There is no vertical levels in the dataset") +# end - if size(datain, 3) == 1 # Only one vertical level - dataout = dropdims(datain[:, :, :, :], dims = 3) - else - dataout = dropdims(finitemean(datain, 3), dims=3) - end +# if size(datain, 3) == 1 # Only one vertical level +# dataout = dropdims(datain[:, :, :, :], dims = 3) +# else +# dataout = dropdims(finitemean(datain, 3), dims=3) +# end - # Build output AxisArray - FD = buildarray_verticalmean(C, dataout) +# # Build output AxisArray +# FD = buildarray_verticalmean(C, dataout) - # Return climGrid type containing the indice - return ClimGrid(FD, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable="periodmean", typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs) -end +# # Return climGrid type containing the indice +# return ClimGrid(FD, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable="periodmean", typeofvar=C.typeofvar, typeofcal="climatology", varattribs=C.varattribs, globalattribs=C.globalattribs) +# end -function buildarray_climato(C::ClimGrid, dataout) - lonsymbol = Symbol(C.dimension_dict["lon"]) - latsymbol = Symbol(C.dimension_dict["lat"]) - if ndims(dataout) == 2 # original was a 3D field - FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val)) - elseif ndims(dataout) == 3 # original was a 4D field - levsymbol = Symbol(C.dimension_dict["height"]) - FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{levsymbol}(C[1][Axis{levsymbol}].val)) - end - return FD -end +# function buildarray_climato(C::ClimGrid, dataout) +# lonsymbol = Symbol(C.dimension_dict["lon"]) +# latsymbol = Symbol(C.dimension_dict["lat"]) +# if ndims(dataout) == 2 # original was a 3D field +# FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val)) +# elseif ndims(dataout) == 3 # original was a 4D field +# levsymbol = Symbol(C.dimension_dict["height"]) +# FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{levsymbol}(C[1][Axis{levsymbol}].val)) +# end +# return FD +# end -function buildarray_verticalmean(C::ClimGrid, dataout) - lonsymbol = Symbol(C.dimension_dict["lon"]) - latsymbol = Symbol(C.dimension_dict["lat"]) - timesymbol = Symbol(C.dimension_dict["time"]) +# function buildarray_verticalmean(C::ClimGrid, dataout) +# lonsymbol = Symbol(C.dimension_dict["lon"]) +# latsymbol = Symbol(C.dimension_dict["lat"]) +# timesymbol = Symbol(C.dimension_dict["time"]) - FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{timesymbol}(C[1][Axis{timesymbol}].val)) +# FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{timesymbol}(C[1][Axis{timesymbol}].val)) - return FD -end +# return FD +# end -function buildarrayinterface(axisArraytmp, A) - latsymbol = Symbol(A.dimension_dict["lat"]) - lonsymbol = Symbol(A.dimension_dict["lon"]) - if ndims(axisArraytmp) == 2 - axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val)) - elseif ndims(axisArraytmp) == 3 - axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val), Axis{:time}(A[1][Axis{:time}].val)) - elseif ndims(axisArraytmp) == 4 - axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val), Axis{:plev}(A[1][Axis{:plev}].val), Axis{:time}(A[1][Axis{:time}].val)) - end - return axisArray -end +# function buildarrayinterface(axisArraytmp, A) +# latsymbol = Symbol(A.dimension_dict["lat"]) +# lonsymbol = Symbol(A.dimension_dict["lon"]) +# if ndims(axisArraytmp) == 2 +# axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val)) +# elseif ndims(axisArraytmp) == 3 +# axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val), Axis{:time}(A[1][Axis{:time}].val)) +# elseif ndims(axisArraytmp) == 4 +# axisArray = AxisArray(axisArraytmp, Axis{lonsymbol}(A[1][Axis{lonsymbol}].val), Axis{latsymbol}(A[1][Axis{latsymbol}].val), Axis{:plev}(A[1][Axis{:plev}].val), Axis{:time}(A[1][Axis{:time}].val)) +# end +# return axisArray +# end -function buildarray_annual(C::ClimGrid, dataout, numYears) - lonsymbol = Symbol(C.dimension_dict["lon"]) - latsymbol = Symbol(C.dimension_dict["lat"]) - FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(Dates.year.(DateTime.(numYears)))) - return FD -end +# function buildarray_annual(C::ClimGrid, dataout, numYears) +# lonsymbol = Symbol(C.dimension_dict["lon"]) +# latsymbol = Symbol(C.dimension_dict["lat"]) +# FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(Dates.year.(DateTime.(numYears)))) +# return FD +# end -function buildarray_resample(C::ClimGrid, dataout, newtime) - lonsymbol = Symbol(C.dimension_dict["lon"]) - latsymbol = Symbol(C.dimension_dict["lat"]) - FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(newtime)) - return FD -end +# function buildarray_resample(C::ClimGrid, dataout, newtime) +# lonsymbol = Symbol(C.dimension_dict["lon"]) +# latsymbol = Symbol(C.dimension_dict["lat"]) +# FD = AxisArray(dataout, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(newtime)) +# return FD +# end -""" - function temporalsubset(C::ClimGrid, startdate::Date, enddate::Date) +# """ +# function temporalsubset(C::ClimGrid, startdate::Date, enddate::Date) -Returns the temporal subset of ClimGrid C. The temporal subset is defined by a start and end date. +# Returns the temporal subset of ClimGrid C. The temporal subset is defined by a start and end date. -""" -function temporalsubset(C::ClimGrid, datebeg::Tuple, dateend::Tuple) +# """ +# function temporalsubset(C::ClimGrid, datebeg::Tuple, dateend::Tuple) - T = typeof(get_timevec(C)[1]) - timeV = get_timevec(C) - idxtimebeg, idxtimeend = timeindex(timeV, datebeg, dateend, T) +# T = typeof(get_timevec(C)[1]) +# timeV = get_timevec(C) +# idxtimebeg, idxtimeend = timeindex(timeV, datebeg, dateend, T) - # startdate = buildtimetype(datebeg, T) - # enddate = buildtimetype(dateend, T) +# # startdate = buildtimetype(datebeg, T) +# # enddate = buildtimetype(dateend, T) - # some checkups - @argcheck idxtimebeg <= idxtimeend +# # some checkups +# @argcheck idxtimebeg <= idxtimeend - dataOut = C[1][Axis{:time}(idxtimebeg:idxtimeend)] +# dataOut = C[1][Axis{:time}(idxtimebeg:idxtimeend)] - # The following control ensure that a 1-timestep temporal subset returns a 3D Array with time information on the timestep. i.e. startdate == enddate - if ndims(dataOut) == 2 - timeV = startdate - latsymbol = Symbol(C.dimension_dict["lat"]) - lonsymbol = Symbol(C.dimension_dict["lon"]) - data2 = fill(NaN, (size(dataOut,1), size(dataOut, 2), 1)) - data2[:,:,1] = dataOut - dataOut = AxisArray(data2, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(C[1][Axis{:time}].val)) +# # The following control ensure that a 1-timestep temporal subset returns a 3D Array with time information on the timestep. i.e. startdate == enddate +# if ndims(dataOut) == 2 +# timeV = startdate +# latsymbol = Symbol(C.dimension_dict["lat"]) +# lonsymbol = Symbol(C.dimension_dict["lon"]) +# data2 = fill(NaN, (size(dataOut,1), size(dataOut, 2), 1)) +# data2[:,:,1] = dataOut +# dataOut = AxisArray(data2, Axis{lonsymbol}(C[1][Axis{lonsymbol}].val), Axis{latsymbol}(C[1][Axis{latsymbol}].val), Axis{:time}(C[1][Axis{:time}].val)) - end +# end - return ClimGrid(dataOut, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs) +# return ClimGrid(dataOut, longrid=C.longrid, latgrid=C.latgrid, msk=C.msk, grid_mapping=C.grid_mapping, dimension_dict=C.dimension_dict, timeattrib=C.timeattrib, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs) -end +# end -""" - get_timevec(C::ClimGrid) +# """ +# get_timevec(C::ClimGrid) -Returns time vector of ClimGrid C. -""" -get_timevec(C::ClimGrid) = C[1][Axis{:time}][:] +# Returns time vector of ClimGrid C. +# """ +# get_timevec(C::ClimGrid) = C[1][Axis{:time}][:] -""" - buildtimetype(datetuple, f) +# """ +# buildtimetype(datetuple, f) -Returns the adequate DateTime for temporal subsetting using DateType *f* -""" -function buildtimetype(date_tuple, f) - - if length(date_tuple) == 1 - dateout = f(date_tuple[1], 01, 01) - elseif length(date_tuple) == 2 - dateout = f(date_tuple[1], date_tuple[2], 01) - elseif length(date_tuple) == 3 - dateout = f(date_tuple[1], date_tuple[2], date_tuple[3]) - elseif length(date_tuple) == 4 - dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], 00, 00) - elseif length(date_tuple) == 5 - dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], date_tuple[5], 00) - elseif length(date_tuple) == 6 - dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], date_tuple[5], date_tuple[6]) - end +# Returns the adequate DateTime for temporal subsetting using DateType *f* +# """ +# function buildtimetype(date_tuple, f) + +# if length(date_tuple) == 1 +# dateout = f(date_tuple[1], 01, 01) +# elseif length(date_tuple) == 2 +# dateout = f(date_tuple[1], date_tuple[2], 01) +# elseif length(date_tuple) == 3 +# dateout = f(date_tuple[1], date_tuple[2], date_tuple[3]) +# elseif length(date_tuple) == 4 +# dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], 00, 00) +# elseif length(date_tuple) == 5 +# dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], date_tuple[5], 00) +# elseif length(date_tuple) == 6 +# dateout = f(date_tuple[1], date_tuple[2], date_tuple[3], date_tuple[4], date_tuple[5], date_tuple[6]) +# end - return dateout -end +# return dateout +# end -""" - timeindex(timeVec, start_date, end_date, T) +# """ +# timeindex(timeVec, start_date, end_date, T) -Return the index of time vector specified by start_date and end_date. T is the DateTime type (see NCDatasets.jl documentation). -""" -function timeindex(timeV, datebeg::Tuple, dateend::Tuple, T) - - # Start Date - if !isinf(datebeg[1]) - # Build DateTime type - start_date = buildtimetype(datebeg, T) - # @argcheck start_date >= timeV[1] - idxtimebeg = findfirst(timeV .>= start_date)[1] - else - idxtimebeg = 1 - end - # End date - if !isinf(dateend[1]) - # Build DateTime type - end_date = buildtimetype(dateend, T) - # @argcheck end_date <= timeV[end] - idxtimeend = findlast(timeV .<= end_date)[1] - else - idxtimeend = length(timeV) - end +# Return the index of time vector specified by start_date and end_date. T is the DateTime type (see NCDatasets.jl documentation). +# """ +# function timeindex(timeV, datebeg::Tuple, dateend::Tuple, T) + +# # Start Date +# if !isinf(datebeg[1]) +# # Build DateTime type +# start_date = buildtimetype(datebeg, T) +# # @argcheck start_date >= timeV[1] +# idxtimebeg = findfirst(timeV .>= start_date)[1] +# else +# idxtimebeg = 1 +# end +# # End date +# if !isinf(dateend[1]) +# # Build DateTime type +# end_date = buildtimetype(dateend, T) +# # @argcheck end_date <= timeV[end] +# idxtimeend = findlast(timeV .<= end_date)[1] +# else +# idxtimeend = length(timeV) +# end - if !isinf(datebeg[1]) && !isinf(dateend[1]) - @argcheck start_date <= end_date - end - return idxtimebeg, idxtimeend -end +# if !isinf(datebeg[1]) && !isinf(dateend[1]) +# @argcheck start_date <= end_date +# end +# return idxtimebeg, idxtimeend +# end # """ diff --git a/src/gev.jl b/src/gev.jl new file mode 100644 index 00000000..21d745c4 --- /dev/null +++ b/src/gev.jl @@ -0,0 +1,108 @@ +function rlevels_cube(xout, xin; threshold=nothing, rlevels = [1, 2, 5, 10, 25, 50, 100, 1000], minimalvalue=1.0) + + if all(ismissing, xin) + xout .= missing + return + end + + if isnothing(threshold) + dataforquantile = xin[xin .> minimalvalue] + if !isempty(dataforquantile) + threshold = quantile(dataforquantile, 0.92) + exceedances = xin[xin .> threshold] .- threshold + model = Extremes.gpfit(exceedances) + + nobs = size(xin,1) + + nobsperblock = 365 + + r_h = returnlevel.(model, threshold, nobs, nobsperblock, rlevels) + + xout .= [x.value[1] for x in r_h] + + else + xout .= missing + return + end + + end + +end + + +""" + rlevels_cube(ds::YAXArray; rlevels = [1, 2, 5, 10, 25, 50, 100, 1000], threshold=nothing, minimalvalue=1.0, latsouth_pixel=0, latnorth_pixel=0, lonwest_pixel=0, loneast_pixel=0) + +Compute the return levels for a dataset `ds` along the time dimension. + +# Arguments +- `ds`: The input dataset. +- `rlevels`: The return levels to compute. Default is [1, 2, 5, 10, 25, 50, 100, 1000]. +- `threshold`: The threshold to use for the Generalized Pareto fit. If not provided, the 0.92 quantile is used. +- `minimalvalue`: The minimal value to consider in the dataset. Default is 1.0. +- `latsouth_pixel`: The south pixel to consider. Default is 0. +- `latnorth_pixel`: The north pixel to consider. Default is 0. +- `lonwest_pixel`: The west pixel to consider. Default is 0. +- `loneast_pixel`: The east pixel to consider. Default is 0. + +# Returns +The return levels of the input dataset `ds` along the time dimension. + +""" +function rlevels_cube(ds::YAXArray; threshold=nothing, rlevels = [1, 2, 5, 10, 25, 50, 100, 1000], minimalvalue=1.0, latsouth_pixel=0, latnorth_pixel=0, lonwest_pixel=0, loneast_pixel=0, lonname="longitude", latname="latitude") + + # Dimensions + # indims = InDims(MovingWindow(latname, latsouth_pixel,latnorth_pixel), MovingWindow(lonname, lonwest_pixel,loneast_pixel), "Ti") + indims = InDims("time") + outdims = OutDims(Dim{:rlevels}(rlevels)) + # outdims = OutDims(outtype=Vector{ReturnLevel{MaximumLikelihoodAbstractExtremeValueModel{ThresholdExceedance}}}) + + return mapCube(rlevels_cube, ds, indims=indims, outdims=outdims, threshold=threshold, rlevels=rlevels, minimalvalue=minimalvalue, nthreads=Threads.nthreads()) + +end + +# function moving_fct(cube::YAXArray; fct::Function=mean, latsouth_pixel=1, latnorth_pixel=1, lonwest_pixel=1, loneast_pixel=1) + +# indims = InDims(MovingWindow("latitude", latsouth_pixel,latnorth_pixel), +# MovingWindow("longitude", lonwest_pixel,loneast_pixel)) +# outdims=OutDims() +# mapCube(moving_fct, cube; fct=fct, indims=indims, outdims=outdims) +# end + + + +function gpfit_cube(xout, xin; threshold=nothing, minimalvalue=1.0) + + # @show size(xin) + + if all(ismissing, xin) + xout .= missing + return + end + + if isnothing(threshold) + dataforquantile = xin[xin .> minimalvalue] + if !isempty(dataforquantile) + threshold = quantile(dataforquantile, 0.95) + exceedances = xin[xin .> threshold] + xout .= Extremes.gpfit(exceedances) + else + threshold = missing + xout .= missing + return + end + + end + + +end + + +function gpfit_cube(ds::YAXArray; threshold=nothing) + + indims = InDims("time") + outdims = OutDims(outtype=Union{Missing, MaximumLikelihoodAbstractExtremeValueModel}) + + return mapCube(gpfit_cube, ds, indims=indims, outdims=outdims, threshold=threshold, nthreads=Threads.nthreads()) + +end \ No newline at end of file diff --git a/src/interface.jl b/src/interface.jl index 1e55d488..f5363be7 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -5,170 +5,6 @@ function getsymbols(C::ClimGrid) return latsymbol, lonsymbol end -""" - merge(A::ClimGrid, B::ClimGrid) - -Combines two ClimGrid. Based on the AxisArrays method. -""" -function Base.merge(A::ClimGrid, B::ClimGrid) - axisArray = merge(A.data, B.data) - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=A.experiment, run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:+(A::ClimGrid, B::ClimGrid) - axisArraytmp = A.data .+ B.data - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " + ", B.experiment), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:+(A::ClimGrid, k) - axisArraytmp = A.data .+ k - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " + ", k), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:-(A::ClimGrid, B::ClimGrid) - - axisArraytmp = A.data .- B.data - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " - ", B.experiment), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:-(A::ClimGrid, k) - axisArraytmp = A.data .- k - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " - ", k), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:*(A::ClimGrid, B::ClimGrid) - - axisArraytmp = A.data .* B.data - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " * ", B.experiment), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:*(A::ClimGrid, k) - axisArraytmp = A.data .* k - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " * ", k), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:/(A::ClimGrid, B::ClimGrid) - axisArraytmp = A.data ./ B.data - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " / ", B.experiment), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -function Base.:/(A::ClimGrid, k) - axisArraytmp = A.data ./ k - - axisArray = buildarrayinterface(axisArraytmp, A) - - ClimGrid(axisArray, longrid=A.longrid, latgrid=A.latgrid, msk=A.msk, grid_mapping=A.grid_mapping, dimension_dict=A.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=string(A.experiment, " / ", k), run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=A.latunits, lonunits=A.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) -end - -""" - mean(A::ClimGrid) - -Compute the mean of `ClimGrid` A -""" -mean(A::ClimGrid) = Statistics.mean(A[1]) - -""" - minimum(A::ClimGrid) - -Compute the minimum value of `ClimGrid` A -""" -Base.minimum(A::ClimGrid) = minimum(A[1]) - -""" - maximum(A::ClimGrid) - -Compute the maximum value of `ClimGrid` A -""" -Base.maximum(A::ClimGrid) = maximum(A[1]) - -""" - std(A::ClimGrid) - -Compute the standard deviation of `ClimGrid` A -""" -std(A::ClimGrid) = Statistics.std(A[1]) - -""" - var(A::ClimGrid) - -Compute the variance of `ClimGrid` A -""" -var(A::ClimGrid) = Statistics.var(A[1]) - - -function getindex(C::ClimGrid,i::Int) - if i == 1 - return C.data - elseif i == 2 - return C.dataunits - elseif i == 3 - return C.model - elseif i == 4 - return C.experiment - elseif i == 5 - return C.run - elseif i == 6 - return C.lonunits - elseif i == 7 - return C.latunits - elseif i == 8 - return C.filename - elseif i == 9 - return C.variable - elseif i == 10 - return C.typeofvar - elseif i == 11 - return C.typeofcal - elseif i == 12 - return C.globalattribs - else - throw(error("You can't index like that!")) - end -end - -# Base.IndexStyle{T<:ClimGrid}(::Type{T}) = Base.IndexLinear() -Base.length(C::ClimGrid) = length(fieldnames(typeof(C))) -Base.size(C::ClimGrid) = (length(C),) -Base.size(C::ClimGrid, n::Int) = n==1 ? length(C) : error("Only dimension 1 has a well-defined size.") -#Base.endof(C::ClimGrid) = length(C) -Base.ndims(::ClimGrid) = 1 -Base.show(io::IO, ::MIME"text/plain", C::ClimGrid) = print(io, "ClimGrid struct with data:\n ", summary(C[1]), "\n", -"Project: ", C.project, "\n", -"Institute: ", C.institute, "\n", -"Model: ", C[3], "\n", -"Experiment: ", C[4], "\n", -"Run: ", C[5], "\n", -"Variable: ", C[9], "\n", -# "Variable CF standard name: ", C.varattribs["standard_name"], "\n", -"Data units: ", C[2], "\n", -"Frequency: ", C.frequency, "\n", -"Global attributes: ", summary(C[12]), "\n", -"Filename: ", C[8]) -# Base.show(io::IO, ::MIME"text/plain", ITP::TransferFunction) = print(io, "TransferFunction type with fields *itp*, *method* and *detrend*", "\n", -# "Interpolation array: ", size(ITP.itp), " transfer functions", "\n", -# "Method: ", ITP.method, "\n", -# "Detrended: ", ITP.detrend) diff --git a/src/interp.jl b/src/interp.jl new file mode 100644 index 00000000..5724a11a --- /dev/null +++ b/src/interp.jl @@ -0,0 +1,168 @@ +""" + C = griddata(A::ClimGrid, B::ClimGrid; min=[], max=[]) + +Interpolate `ClimGrid` A onto the lon-lat grid of `ClimGrid` B, +where A and B are `ClimGrid`. + +Min and max optional keyword are used to constraint the results of the interpolation. For example, interpolating bounded fields can lead to unrealilstic values, such as negative precipitation. In that case, one would use min=0.0 to convert negative precipitation to 0.0. + +""" +function griddata(A::ClimGrid, B::ClimGrid; method="linear", min=[], max=[]) + + # --------------------------------------- + # Get lat-lon information from ClimGrid B + londest, latdest = ClimateTools.getgrids(B) + + # Get lat-lon information from ClimGrid A + lonorig, latorig = ClimateTools.getgrids(A) + points = hcat(lonorig[:], latorig[:]) + + # ----------------------------------------- + # Get initial data and time from ClimGrid A + dataorig = A[1].data + timeorig = get_timevec(A)#[1][Axis{:time}][:] # the function will need to loop over time + + # --------------------- + # Allocate output Array + OUT = zeros(Float64, (size(B.data, 1), size(B.data, 2), length(timeorig))) + + # ------------------------ + # Interpolation + # interp!(OUT, timeorig, dataorig, lonorig, latorig, londest, latdest, A.variable, frac=frac) + griddata!(OUT, timeorig, dataorig, points, londest, latdest, method=method, msk=B.msk) + + if !isempty(min) + OUT[OUT .<= min] .= min + end + + if !isempty(max) + OUT[OUT .>= max] .= max + end + + # ----------------------- + # Construct AxisArrays and ClimGrid struct from array OUT + latsymbol = Symbol(B.dimension_dict["lat"]) + lonsymbol = Symbol(B.dimension_dict["lon"]) + dataOut = AxisArray(OUT, Axis{lonsymbol}(B[1][Axis{lonsymbol}][:]), Axis{latsymbol}(B[1][Axis{latsymbol}][:]), Axis{:time}(timeorig)) + + C = ClimateTools.ClimGrid(dataOut, longrid=B.longrid, latgrid=B.latgrid, msk=B.msk, grid_mapping=B.grid_mapping, dimension_dict=B.dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=A.experiment, run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits=B.latunits, lonunits=B.lonunits, variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) + +end + +""" + C = griddata(A::ClimGrid, londest::AbstractArray{N, 1} where N, latdest::AbstractArray{N, 1} where N) + +Interpolate `ClimGrid` A onto lat-lon grid defined by londest and latdest vector or array. If an array is provided, it is assumed that the grid is curvilinear (not a regular lon-lat grid) and the user needs to provide the dimension vector ("x" and "y") for such a grid. + +""" +function griddata(A::ClimGrid, lon::AbstractArray{N, T} where N where T, lat::AbstractArray{N, T} where N where T; method="linear", dimx=[], dimy=[], min=[], max=[]) + + # Get lat-lon information from ClimGrid A + lonorig, latorig = getgrids(A) + points = hcat(lonorig[:], latorig[:]) + + # ----------------------------------------- + # Get initial data and time from ClimGrid A + dataorig = A[1].data + timeorig = A[1][Axis{:time}][:] # the function will need to loop over time + + if ndims(lon) == 1 + londest, latdest = ndgrid(lon, lat) + dimx = lon + dimy = lat + elseif ndims(lon) == 2 + londest = lon + latdest = lat + @assert !isempty(dimx) + @assert !isempty(dimy) + else + throw(error("Grid should be a vector or a grid")) + end + + # --------------------- + # Allocate output Array + if ndims(lon) == 1 + OUT = zeros(Float64, (length(lon), length(lat), length(timeorig))) + elseif ndims(lon) == 2 + OUT = zeros(Float64, (size(lon, 1), size(lon, 2), length(timeorig))) + end + + # ------------------------ + # Interpolation + griddata!(OUT, timeorig, dataorig, points, londest, latdest, method=method) + + if !isempty(min) + OUT[OUT .<= min] .= min + end + + if !isempty(max) + OUT[OUT .>= max] .= max + end + + # ----------------------- + # Construct AxisArrays and ClimGrid struct from array OUT + dataOut = AxisArray(OUT, Axis{:x}(dimx), Axis{:y}(dimy), Axis{:time}(timeorig)) + msk = Array{Float64}(ones((size(OUT, 1), size(OUT, 2)))) + + if ndims(lon) == 1 + grid_mapping = Dict(["grid_mapping_name" => "Regular_longitude_latitude"]) + dimension_dict = Dict(["lon" => "lon", "lat" => "lat"]) + elseif ndims(lon) == 2 + grid_mapping = Dict(["grid_mapping_name" => "Curvilinear_grid"]) + dimension_dict = Dict(["lon" => "x", "lat" => "y"]) + end + + C = ClimateTools.ClimGrid(dataOut, longrid=londest, latgrid=latdest, msk=msk, grid_mapping=grid_mapping, dimension_dict=dimension_dict, timeattrib=A.timeattrib, model=A.model, frequency=A.frequency, experiment=A.experiment, run=A.run, project=A.project, institute=A.institute, filename=A.filename, dataunits=A.dataunits, latunits="degrees_north", lonunits="degrees_east", variable=A.variable, typeofvar=A.typeofvar, typeofcal=A.typeofcal, varattribs=A.varattribs, globalattribs=A.globalattribs) + +end + +""" + griddata!(OUT, timeorig, dataorig, points, londest, latdest, method, ;msk=[]) +Interpolation of `dataorig` onto longitude grid `londest` and latitude grid `latdest`. Used internally by `regrid`. +""" +function griddata!(OUT, timeorig, dataorig, points, londest, latdest; method="linear", msk=[]) + + p = Progress(length(timeorig), 5, "Regridding: ") + for t = 1:length(timeorig) + + # Points values + val = dataorig[:, :, t][:] + + # Call scipy griddata + data_interp = scipy.griddata(points, val, (londest, latdest), method=method) + + # Apply mask from ClimGrid destination + if !isempty(msk) + OUT[:, :, t] .= data_interp .* msk + else + OUT[:, :, t] .= data_interp + end + + next!(p) + end +end + +""" + getgrids(C::ClimGrid) + +Returns longitude and latitude grids of ClimGrid C. +""" +function getgrids(C::ClimGrid) + longrid = C.longrid + latgrid = C.latgrid + return longrid, latgrid +end + +""" + getdims(C::ClimGrid) + +Returns dimensions vectors of C +""" +function getdims(C::ClimGrid) + latsymbol, lonsymbol = ClimateTools.getsymbols(C) + x = C[1][Axis{lonsymbol}][:] + y = C[1][Axis{latsymbol}][:] + timevec = get_timevec(C) + + return x, y, timevec +end \ No newline at end of file diff --git a/src/markov.jl b/src/markov.jl index 9d938bd4..8ae893c2 100644 --- a/src/markov.jl +++ b/src/markov.jl @@ -19,7 +19,7 @@ The autocorrelation of the input dataset `ds` along the time dimension. function MSModel(ds; k_regime=2, intercept = "switching") # Dimensions - indims = InDims("Ti") + indims = InDims("time") outdims = OutDims(Dim{:MSM}(1)) # outdims = OutDims() diff --git a/src/power.jl b/src/power.jl index 43e42ead..21983509 100644 --- a/src/power.jl +++ b/src/power.jl @@ -12,10 +12,10 @@ Compute the Lomb-Scargle periodogram for a given data cube. """ function LombScargle.lombscargle(cube::YAXArray, kwargs...) - indims = InDims("Ti") + indims = InDims("time") lombax = CategoricalAxis("LombScargle", ["Number of Frequencies", "Period with maximal power", "Maximal Power"]) #@show cube - timeax = YAXArrays.getAxis("Ti", cube) + timeax = YAXArrays.getAxis("time", cube) od = OutDims(lombax) mapCube(clombscargle, (cube, timeax), indims=(indims, indims), outdims=od) end diff --git a/src/processERA5.jl b/src/processERA5.jl index 078090e5..e2279f01 100644 --- a/src/processERA5.jl +++ b/src/processERA5.jl @@ -13,15 +13,16 @@ Compute the daily sum of ERA5 land data. **used internally** - Missing values are represented as `missing` in the input array and are replaced with `NaN` before computation. """ -function ERA5Land_dailysum(xout, xin; index_list = time_to_index) +function ERA5Land_dailysum(xout, xin; index_list) xout .= NaN if !all(ismissing, xin) for i in eachindex(index_list) data = view(xin, index_list[i]) + # data = xin[index_list[i]] if !all(ismissing, data) diffxin = Base.diff(replace(data, missing => NaN)) - xout[i] = sum(.!isnan.(diffxin)) + xout[i] = sum(diffxin[.!isnan.(diffxin)]) end end end @@ -49,16 +50,15 @@ function ERA5Land_dailysum(cube::YAXArray; keep_1stday=false, kwargs...) new_dates = unique(time_index) if keep_1stday - firstindex = 1 + firstidx = 1 else - firstindex = 2 + firstidx = 2 end - index_in_cube = [findall(==(i), time_index) for i in unique(time_index)][firstindex:end] + index_in_cube = [findall(==(i), time_index) for i in unique(time_index)][firstidx:end] # Dimensions - indims = InDims("Ti") - # outdims = OutDims(RangeAxis("time", dates_builder_yearmonthday(new_dates[firstindex:end]))) - outdims = OutDims(Dim{:Ti}(dates_builder_yearmonthday(new_dates[firstindex:end]))) + indims = InDims("time") + outdims = OutDims(Dim{:time}(dates_builder_yearmonthday(new_dates[firstidx:end]))) return mapCube(ERA5Land_dailysum, cube, indims=indims, outdims=outdims, index_list=index_in_cube) diff --git a/src/regrid.jl b/src/regrid.jl new file mode 100644 index 00000000..27ee5444 --- /dev/null +++ b/src/regrid.jl @@ -0,0 +1,82 @@ + + +""" + regrid_cube(cube::YAXArray, target_grid::YAXArray; lonname_source = :longitude, latname_source=:latitude, lonname_dest=:longitude, latname_dest=:latitude) -> YAXArray + +Regrids a `YAXArray` cube to a target grid using a linear interpolation between nodes. + +# Arguments +- `cube::YAXArray`: The input data cube to be regridded. +- `target_grid::YAXArray`: The target grid to which the data cube will be regridded. +- `lonname_source::Symbol`: The longitude dimension name of the source dataset (defaults to :longitude). +- `latname_source::Symbol`: The latitude dimension name of the source dataset (defaults to :latitude). +- `lonname_dest::Symbol`: The longitude dimension name of the destination dataset (defaults to :longitude). +- `latname_dest::Symbol`: The latitude dimension name of the destination dataset (defaults to :latitude). + +# Returns +- `YAXArray`: A new `YAXArray` that has been regridded to the target grid. + + +""" +function regrid_cube(source::YAXArray, dest::YAXArray; lonname_source = :longitude, latname_source=:latitude, lonname_dest=:longitude, latname_dest=:latitude) + + # subset "hardcodé" pour éviter effet de bords + source_spatial = source[Dim{lonname_source}(Base.minimum(lookup(dest, lonname_dest))..Base.maximum(lookup(dest, lonname_dest))), Dim{latname_source}(Base.minimum(lookup(dest, latname_dest))..Base.maximum(lookup(dest, latname_dest)))] + + dest_spatial = dest[Dim{lonname_dest}(Base.minimum(lookup(source_spatial, lonname_source))..Base.maximum(lookup(source_spatial, lonname_source))), Dim{latname_dest}(Base.minimum(lookup(source_spatial,latname_source))..Base.maximum(lookup(source_spatial,latname_source)))] + + # Grille source + xg1, yg1 = collect(source_spatial.longitude), collect(source_spatial.latitude) + Interpolations.deduplicate_knots!([xg1,yg1], move_knots = true) + + if Base.diff(xg1)[1] < 0.0 # vector needs to be monotonically increasing + source_spatial = reverse(source_spatial, dims=Dim{:longitude}) + xg1 = reverse(xg1) + end + + if Base.diff(yg1)[1] < 0.0 # vector needs to be monotonically increasing + source_spatial = reverse(source_spatial, dims=Dim{:latitude}) + yg1 = reverse(yg1) + end + + # Grille destination + xg2, yg2 = collect(dest_spatial.longitude), collect(dest_spatial.latitude) + Interpolations.deduplicate_knots!([xg2,yg2], move_knots = true) + + if Base.diff(xg2)[1] < 0.0 # vector needs to be monotonically increasing + # dest_spatial = reverse(dest_spatial, dims=Dim{:longitude}) + xg2 = reverse(xg2) + end + + if Base.diff(yg2)[1] < 0.0 # vector needs to be monotonically increasing + # dest_spatial = reverse(dest_spatial, dims=Dim{:latitude}) + yg2 = reverse(yg2) + end + + coords = Iterators.product(xg2,yg2) + + # Dimensions des calculs distribués + indims = InDims(string(lonname_source),string(latname_source)) + outdims = OutDims(Dim{lonname_source}(xg2), Dim{latname_source}(yg2)) + + # build_itp_spatial(xg1, yg1) + + return mapCube(regrid_cube, source_spatial; xg1=xg1, yg1=yg1, coords=coords, indims=indims, outdims=outdims) + +end + +""" + regrid_cube(xout, xin; xg1, yg1, coords) + +""" +function regrid_cube(xout, xin; xg1, yg1, coords) + + # @show size(xin) + + # Interpolation structure + itp = interpolate((xg1,yg1), xin, Gridded(Linear())) + etpf = extrapolate(itp, Interpolations.Flat()) + + return xout .= (c->etpf(c...)).(coords) + +end \ No newline at end of file diff --git a/src/spatial.jl b/src/spatial.jl index df6061c0..b626fcc2 100644 --- a/src/spatial.jl +++ b/src/spatial.jl @@ -1,66 +1,93 @@ """ - spatialsubset(C::ClimGrid, poly::Array{N, 2}) + moving_fct(xout, xin, fct) -Returns the spatial subset of ClimGrid C. The spatial subset is defined by the polygon poly, defined on a -180, +180 longitude reference. +Used as inner function of moving_fct(cube::YAXArray). +See also [`moving_fct(cube::YAXArray)`](@ref). """ -function spatialsubset(C::ClimGrid, poly) +function moving_fct(xout, xin; fct::Function=mean) + xout .= fct(skipmissing(xin)) +end - # Some checks for polygon poly +""" + moving_fct(cube::YAXArray; fct=mean, lat_before=1, lat_after=1, lon_before=1, lon_after=1)) - if size(poly, 1) != 2 && size(poly, 2) == 2 - # transpose - poly = poly' - end +Spatial moving function. lat_before/after and lon_before/after is the number of pixel used for the bounding box and fct is the function applied +""" +function moving_fct(cube::YAXArray; fct::Function=mean, latsouth_pixel=1, latnorth_pixel=1, lonwest_pixel=1, loneast_pixel=1) + + indims = InDims(MovingWindow("latitude", latsouth_pixel,latnorth_pixel), + MovingWindow("longitude", lonwest_pixel,loneast_pixel)) + outdims=OutDims() + mapCube(moving_fct, cube; fct=fct, indims=indims, outdims=outdims) +end - lonsymbol = Symbol(C.dimension_dict["lon"]) - latsymbol = Symbol(C.dimension_dict["lat"]) - longrid = C.longrid - latgrid = C.latgrid - lon = C[1][Axis{lonsymbol}][:] - lat = C[1][Axis{latsymbol}][:] - msk = inpolygrid(longrid, latgrid, poly) +# """ +# spatialsubset(A::YAXArray, poly::Array{N, 2}) - begin - I = findall(!isnan, msk) - idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) - end - minXgrid = minimum(idlon) - maxXgrid = maximum(idlon) - minYgrid = minimum(idlat) - maxYgrid = maximum(idlat) - - # Get DATA - data = C[1].data - - if ndims(data) == 3 - data = data[minXgrid:maxXgrid, minYgrid:maxYgrid, :] - elseif ndims(data) == 4 - data = data[minXgrid:maxXgrid, minYgrid:maxYgrid, :, :] - end +# Returns the spatial subset of YAXArray A. The spatial subset is defined by the polygon polygon, defined on a -180, +180 longitude reference. - #new mask (e.g. representing the region of the polygon) - msk = msk[minXgrid:maxXgrid, minYgrid:maxYgrid] - data = applymask(data, msk) +# """ +# function spatialsubset(A::YAXArray, poly) - # Get lon-lat for such region - longrid = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] - lon = lon[minXgrid:maxXgrid] - lat = lat[minYgrid:maxYgrid] +# # Some checks for polygon poly - if ndims(data) == 3 - dataOut = AxisArray(data, Axis{lonsymbol}(lon), Axis{latsymbol}(lat), Axis{:time}(C[1][Axis{:time}][:])) - elseif ndims(data) == 4 - dataOut = AxisArray(data, Axis{lonsymbol}(lon), Axis{latsymbol}(lat), Axis{:level}(C[1][Axis{:plev}][:]), Axis{:time}(C[1][Axis{:time}][:])) - end +# if size(poly, 1) != 2 && size(poly, 2) == 2 +# # transpose +# poly = poly' +# end - return ClimGrid(dataOut, longrid=longrid, latgrid=latgrid, msk=msk, grid_mapping=C.grid_mapping, timeattrib=C.timeattrib, dimension_dict=C.dimension_dict, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs) +# lonsymbol = Symbol(C.dimension_dict["lon"]) +# latsymbol = Symbol(C.dimension_dict["lat"]) -end +# longrid = C.longrid +# latgrid = C.latgrid + +# lon = C[1][Axis{lonsymbol}][:] +# lat = C[1][Axis{latsymbol}][:] + +# msk = inpolygrid(longrid, latgrid, poly) + +# begin +# I = findall(!isnan, msk) +# idlon, idlat = (getindex.(I, 1), getindex.(I, 2)) +# end +# minXgrid = minimum(idlon) +# maxXgrid = maximum(idlon) +# minYgrid = minimum(idlat) +# maxYgrid = maximum(idlat) + +# # Get DATA +# data = C[1].data + +# if ndims(data) == 3 +# data = data[minXgrid:maxXgrid, minYgrid:maxYgrid, :] +# elseif ndims(data) == 4 +# data = data[minXgrid:maxXgrid, minYgrid:maxYgrid, :, :] +# end + +# #new mask (e.g. representing the region of the polygon) +# msk = msk[minXgrid:maxXgrid, minYgrid:maxYgrid] +# data = applymask(data, msk) + +# # Get lon-lat for such region +# longrid = longrid[minXgrid:maxXgrid, minYgrid:maxYgrid] +# latgrid = latgrid[minXgrid:maxXgrid, minYgrid:maxYgrid] +# lon = lon[minXgrid:maxXgrid] +# lat = lat[minYgrid:maxYgrid] + +# if ndims(data) == 3 +# dataOut = AxisArray(data, Axis{lonsymbol}(lon), Axis{latsymbol}(lat), Axis{:time}(C[1][Axis{:time}][:])) +# elseif ndims(data) == 4 +# dataOut = AxisArray(data, Axis{lonsymbol}(lon), Axis{latsymbol}(lat), Axis{:level}(C[1][Axis{:plev}][:]), Axis{:time}(C[1][Axis{:time}][:])) +# end + +# return ClimGrid(dataOut, longrid=longrid, latgrid=latgrid, msk=msk, grid_mapping=C.grid_mapping, timeattrib=C.timeattrib, dimension_dict=C.dimension_dict, model=C.model, frequency=C.frequency, experiment=C.experiment, run=C.run, project=C.project, institute=C.institute, filename=C.filename, dataunits=C.dataunits, latunits=C.latunits, lonunits=C.lonunits, variable=C.variable, typeofvar=C.typeofvar, typeofcal=C.typeofcal, varattribs=C.varattribs, globalattribs=C.globalattribs) + +# end """ @@ -234,3 +261,5 @@ function findmindist(p1::Tuple{Float64, Float64}, p2::Tuple{Array{Float64,1},Arr return findmin(dist) end + + diff --git a/src/statistics.jl b/src/statistics.jl new file mode 100644 index 00000000..9bd2e1b5 --- /dev/null +++ b/src/statistics.jl @@ -0,0 +1,18 @@ +function quantiles(xout,xin; q) + + xout .= NaN # Initialisation + + if !all(ismissing, xin) + xout .= quantile(skipmissing(xin), q) + end +end + +function quantiles(cube::YAXArray, q=[0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99], dims::String="number") + + # Dimensions + indims = InDims(dims) + outdims = OutDims(Dim{:Quantiles}(q)) + + mapCube(quantiles, cube, indims=indims, outdims=outdims, q=q) + +end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index f5a4d165..789ae537 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -11,18 +11,18 @@ function dates_builder_year(x) end function dates_builder_yearmonth(x) - out = Date[] + out = DateTime[] for i in eachindex(x) - push!(out, Date(x[i][1], x[i][2])) + push!(out, DateTime(x[i][1], x[i][2])) end return out end function dates_builder_yearmonth_hardcode(x, imois) - out = Date[] + out = DateTime[] for i in eachindex(x) - push!(out, Date(x[i][1], imois)) + push!(out, DateTime(x[i][1], imois)) end return out @@ -31,18 +31,18 @@ end function dates_builder_yearmonthday(x) - out = Date[] + out = DateTime[] for i in eachindex(x) - push!(out, Date(x[i][1], x[i][2], x[i][3])) + push!(out, DateTime(x[i][1], x[i][2], x[i][3])) end return out end function dates_builder_yearmonthday_hardcode(x; imois=1, iday=1) - out = Date[] + out = DateTime[] for i in eachindex(x) - push!(out, Date(x[i][1], imois)) + push!(out, DateTime(x[i][1], imois)) end return out @@ -52,15 +52,28 @@ function subsample(cube::YAXArray; month1=1, month2=12) # On vérifie s'il y a un overlap sur l'année (e.g. on aimerait novembre, décember et janvier) if month2 < month1 # overlap annuel - @error "Not implemented. consider something along the lines of: newcube = cube[Ti=Where(x -> month(x) != month2)]" - newdim = dim[Dim{:Ti}(Where(x -> month(x) <= month2 .| month(x) >= month1))] - # index = (Dates.month.(datevecin) .<= endmonth) .& (Dates.month.(datevecin) .>= startmonth) + newcube1 = cube[time=Where(x -> month(x) >= month1)] + newcube2 = cube[time=Where(x -> month(x) <= month2)] + + # newcube1 = renameaxis!(newcube1, :time=>:Ti) + # newcube2 = renameaxis!(newcube2, :time=>:Ti) + + + newlookupvals = vcat(collect(newcube1.time), collect(newcube2.time)) + # newlookupvals = vcat(collect(newcube1.Ti), collect(newcube2.Ti)) + + # newcube = cat(newcube1, newcube2; dims=DimensionalData.Dimensions.Ti(newlookupvals)) + newcube = cat(newcube1, newcube2; dims=Dim{:time}(newlookupvals)) + # Dim{:time}(newlookupvals) + + + # newcube = renameaxis!(newcube2, :Ti=>:time) + else - newcube = cube[Ti=Where(x -> month(x) <= month2 .& month(x) >= month1)] + newcube = cube[time=Where(x -> month(x) <= month2 .&& month(x) >= month1)] end - return newcube - + return newcube end