Skip to content
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 45 additions & 22 deletions ext/OceananigansNCDatasetsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,48 +57,74 @@ const BuoyancyBoussinesqEOSModel = BuoyancyForce{<:BoussinesqSeawaterBuoyancy, g
##### Extend defVar to be able to write fields to NetCDF directly
#####


function squeeze_reduced_dimensions(data, reduced_dims)
# Fill missing indices from the right with 1s
indices = Any[:,:,:]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
indices = Any[:,:,:]
indices = Any[:, : ,:]

what does this mean exactly?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is meant for the whole algorithm, not just that line. Basically it gets Fields that have Nothing in any of the locations and it gets rid of that dimension. (I'm calling these reduced_dims but there's probably a better name.)

So for example, if data comes from an array that has loc = Center, Center, Nothing, it'll reduce its parent data by indexing it as data[:, :, 1]; i.e. a 2D array in this case.

I'm testing this out (hence a draft PR) after some discussions with @ali-ramadhan. Basically we want to keep all NetCDF arrays 3D (to mimic Oceananigans' behavior) as much as possible, but in NetCDF there's no concept of location, so we can't easily pass LZ = Nothing to NetCDF. In these cases it probably makes sense to squeeze that dimension (or whatever the verb for this is) so that we know that dimension is reduced.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add attributes that indicate the location? We can add custom attributes to data even if they are not built in, right?

Copy link
Collaborator Author

@tomchor tomchor Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can (and in fact I probably will do it regardless), but that doesn't affect calculations with any of the software (that I'm aware of) which reads NetCDF. So broadcasting and other useful things wouldn't work. Without the singleton dimensions (i.e. with the change that I'm exploring here), those things will work out of the box for most software.

Also if we include the singleton reduced dimension, we still would need to pick a value for that length-1 coordinate. In Oceananigans, that value is nothing I think, but there's no analog to that for NetCDF, so we'd need to pick an arbitrary value like 1 or 0, which I'd like to avoid.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also to be clear, this PR would move the NetCDF behavior closer to Oceananigans' behavior. Currently the NetCDF writer also drops singleton dimensions whenever we slice a field, even though that sliced dimension (or length 1) has a proper value. This PR would change the behavior and keep that sliced length-1 dimension in the NetCDF, just like Oceananigans does.

for i in 1:3
if i ∈ reduced_dims
indices[i] = 1
end
end
return getindex(data, indices...)
end

defVar(ds, name, op::AbstractOperation; kwargs...) = defVar(ds, name, Field(op); kwargs...)
defVar(ds, name, op::Reduction; kwargs...) = defVar(ds, name, Field(op); kwargs...)

function defVar(ds, name, field::AbstractField;
time_dependent=false,
with_halos=false,
dimension_name_generator = trilocation_dim_name,
kwargs...)
field_cpu = on_architecture(CPU(), field) # Need to bring field to CPU in order to write it to NetCDF
field_data = Array{eltype(field)}(field_cpu)
if with_halos
field_data = Array{eltype(field)}(parent(field_cpu))
else
field_data = Array{eltype(field)}(field_cpu)
end
dims = field_dimensions(field, dimension_name_generator)
all_dims = time_dependent ? (dims..., "time") : dims

# Validate that all dimensions exist and match the field
create_field_dimensions!(ds, field, all_dims, dimension_name_generator)
defVar(ds, name, field_data, all_dims; kwargs...)
create_field_dimensions!(ds, field, all_dims, dimension_name_generator; with_halos)

squeezed_field_data = squeeze_reduced_dimensions(field_data, effective_reduced_dimensions(field))
defVar(ds, name, squeezed_field_data, all_dims; kwargs...)
end

#####
##### Dimension validation
#####

"""
create_field_dimensions!(ds, field::AbstractField, all_dims, dimension_name_generator)
create_field_dimensions!(ds, field::AbstractField, dim_names, dimension_name_generator)

Creates all dimensions for the given `field` in the NetCDF dataset `ds`. If the dimensions
already exist, they are validated to match the expected dimensions for the given `field`.

Arguments:
- `ds`: NetCDF dataset
- `field`: AbstractField being written
- `all_dims`: Tuple of dimension names to create/validate
- `dim_names`: Tuple of dimension names to create/validate
- `dimension_name_generator`: Function to generate dimension names
"""
function create_field_dimensions!(ds, field::AbstractField, all_dims, dimension_name_generator)
function create_field_dimensions!(ds, field::AbstractField, dim_names, dimension_name_generator; with_halos=false)
dimension_attributes = default_dimension_attributes(field.grid, dimension_name_generator)
spatial_dims = all_dims[1:end-(("time" in all_dims) ? 1 : 0)]
spatial_dim_names = dim_names[1:end-(("time" in dim_names) ? 1 : 0)]

# Main.@infiltrate
# Get spatial dimensions excluding reduced dimensions (i.e. dimensions where `loc isa Nothing``)
reduced_dims = effective_reduced_dimensions(field)
node_data = nodes(field; with_halos)
spatial_dim_data = [data for (i, data) in enumerate(node_data) if i ∉ reduced_dims]

spatial_dims_dict = Dict(dim_name => dim_data for (dim_name, dim_data) in zip(spatial_dims, nodes(field)))
create_spatial_dimensions!(ds, spatial_dims_dict, dimension_attributes; array_type=Array{eltype(field)})
# Create dictionary of spatial dimensions and their data
spatial_dim_names_dict = Dict(dim_name => dim_data for (dim_name, dim_data) in zip(spatial_dim_names, spatial_dim_data))
create_spatial_dimensions!(ds, spatial_dim_names_dict, dimension_attributes; array_type=Array{eltype(field)})

# Create time dimension if needed
if "time" in all_dims && "time" ∉ keys(ds.dim)
if "time" in dim_names && "time" ∉ keys(ds.dim)
create_time_dimension!(ds)
end

Expand Down Expand Up @@ -151,7 +177,8 @@ function create_spatial_dimensions!(dataset, dims, attributes_dict; array_type=A
defVar(dataset, dim_name, array_type(dim_array), (dim_name,), attrib=attributes_dict[dim_name]; kwargs...)
else
# Validate existing dimension
if dataset[dim_name] != dim_array
if collect(dataset[dim_name]) != collect(dim_array)
# dim_name == "y_aca" && Main.@infiltrate
throw(ArgumentError("Dimension '$dim_name' already exists in dataset but is different from expected.\n" *
" Actual: $(dataset[dim_name]) (length=$(length(dataset[dim_name])))\n" *
" Expected: $(dim_array) (length=$(length(dim_array)))"))
Expand Down Expand Up @@ -1236,7 +1263,8 @@ function initialize_nc_file(model,
dimensions,
filepath, # for better error messages
dimension_name_generator,
false) # time_dependent = false
false, # time_dependent = false
with_halos)

save_output!(dataset, output, model, name, array_type)
end
Expand All @@ -1255,7 +1283,8 @@ function initialize_nc_file(model,
dimensions,
filepath, # for better error messages
dimension_name_generator,
true) # time_dependent = true)
true, # time_dependent = true)
with_halos)
end

sync(dataset)
Expand Down Expand Up @@ -1293,7 +1322,7 @@ materialize_output(output::WindowedTimeAverage{<:AbstractField}, model) = output
""" Defines empty variables for 'custom' user-supplied `output`. """
function define_output_variable!(dataset, output, name, array_type,
deflatelevel, attrib, dimensions, filepath,
dimension_name_generator, time_dependent)
dimension_name_generator, time_dependent, with_halos)

if name ∉ keys(dimensions)
msg = string("dimensions[$name] for output $name=$(typeof(output)) into $filepath" *
Expand All @@ -1311,15 +1340,9 @@ end
""" Defines empty field variable. """
function define_output_variable!(dataset, output::AbstractField, name, array_type,
deflatelevel, attrib, dimensions, filepath,
dimension_name_generator, time_dependent)

dims = field_dimensions(output, dimension_name_generator)
FT = eltype(array_type)

all_dims = time_dependent ? (dims..., "time") : dims

defVar(dataset, name, FT, all_dims; deflatelevel, attrib)
dimension_name_generator, time_dependent, with_halos)

defVar(dataset, name, output; time_dependent, with_halos, deflatelevel, attrib)
return nothing
end

Expand Down