Skip to content

Commit 92c546d

Browse files
committed
Implement support for Zarr read/write
Squashed commit of the following: commit 649aa0a Author: Ilia Kats <ilia-kats@gmx.net> Date: Tue Aug 26 15:11:46 2025 +0200 enable unit tests for Zarr and make them pass commit 8ed25ed Author: Ilia Kats <ilia-kats@gmx.net> Date: Mon Aug 25 18:17:58 2025 +0200 make tests pass again commit 17bb1b2 Author: Ilia Kats <ilia-kats@gmx.net> Date: Mon Aug 25 16:09:11 2025 +0200 update JuliaFormatter config and reformat everything commit a702865 Author: Ilia Kats <ilia-kats@gmx.net> Date: Mon Aug 25 16:07:06 2025 +0200 implement support for writing Zarr files commit db9abf9 Merge: ef4d948 eefb2c6 Author: Ilia Kats <ilia-kats@gmx.net> Date: Fri Aug 22 15:20:40 2025 +0200 Merge branch 'main' into zarr commit ef4d948 Author: Ilia Kats <ilia-kats@gmx.net> Date: Fri Apr 28 15:00:31 2023 +0200 split reading into separate functions for hdf5 and zarr commit 22bde93 Author: Ilia Kats <ilia-kats@gmx.net> Date: Fri Apr 28 14:19:33 2023 +0200 Zarr for mudata, some bugfixes for backed storage commit c61faba Author: Ilia Kats <ilia-kats@gmx.net> Date: Fri Apr 28 13:35:12 2023 +0200 initial Zarr-backed AnnData support commit 735cd1a Author: Ilia Kats <ilia-kats@gmx.net> Date: Fri Apr 28 10:09:29 2023 +0200 add basic Zarr support Only non-backed reading works currently. This requires JuliaIO/Zarr.jl#116 and JuliaIO/Zarr.jl#117
1 parent eefb2c6 commit 92c546d

21 files changed

+1274
-894
lines changed

.JuliaFormatter.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
indent = 4
22
margin = 100
33
always_for_in = true
4+
for_in_replacement = ""
45
whitespace_typedefs = true
56
whitespace_ops_in_indices = true
67
remove_extra_newlines = true
78
whitespace_in_kwargs = false
89
annotate_untyped_fields_with_any = false
9-
#normalize_line_endings = "unix"
10+
normalize_line_endings = "unix"

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
1616
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1717
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1818
StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
19+
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
1920

2021
[compat]
2122
CategoricalArrays = "0.9, 0.10, 1"
@@ -28,6 +29,7 @@ OrderedCollections = "1.6"
2829
PooledArrays = "1"
2930
StructArrays = "0.6.4"
3031
julia = "1.5"
32+
Zarr = "0.9"
3133

3234
[extras]
3335
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

src/Muon.jl

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ using SparseArrays
55
import LinearAlgebra: Adjoint
66

77
using HDF5
8+
using Zarr
89
using DataFrames
910
using CategoricalArrays
1011
using StructArrays
@@ -13,7 +14,18 @@ import CompressHashDisplace: FrozenDict
1314
import OrderedCollections: OrderedDict
1415
using FileIO
1516

16-
export readh5mu, readh5ad, writeh5mu, writeh5ad, isbacked, update_obs!, update_var!, update!
17+
export readh5mu,
18+
readh5ad,
19+
readzarrmu,
20+
readzarrad,
21+
writeh5mu,
22+
writeh5ad,
23+
writezarrmu,
24+
writezarrad,
25+
isbacked,
26+
update_obs!,
27+
update_var!,
28+
update!
1729
export AnnData, MuData
1830
export var_names_make_unique!, obs_names_make_unique!
1931

@@ -27,11 +39,13 @@ end
2739
MUDATAVERSION = v"0.1.0"
2840
ANNDATAVERSION = v"0.1.0"
2941

30-
42+
include("typedefs.jl")
3143
include("index.jl")
3244
include("sparsedataset.jl")
3345
include("transposeddataset.jl")
46+
include("common_io.jl")
3447
include("hdf5_io.jl")
48+
include("zarr_io.jl")
3549
include("alignedmapping.jl")
3650
include("anndata.jl")
3751
include("mudata.jl")

src/alignedmapping.jl

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,37 @@ struct AlignedMapping{T <: Tuple, K, R} <: AbstractAlignedMapping{
2020
}
2121

2222
function AlignedMapping{T, K}(r, d::AbstractDict{K}) where {T <: Tuple, K}
23-
for (k, v) in d
23+
for (k, v) d
2424
checkdim(T, v, r, k)
2525
end
2626
return new{T, K, typeof(r)}(r, d)
2727
end
2828
end
2929

30-
mutable struct BackedAlignedMapping{T <: Tuple, R} <:
30+
mutable struct BackedAlignedMapping{T <: Tuple, G <: Group, R} <:
3131
AbstractAlignedMapping{T, String, AbstractArray{<:Number}}
3232
ref::R
33-
d::Union{HDF5.Group, Nothing}
34-
parent::Union{HDF5.File, HDF5.Group, Nothing}
33+
d::Union{G, Nothing}
34+
parent::Union{G, Nothing}
3535
path::Union{String, Nothing}
3636

37-
function BackedAlignedMapping{T}(r, g::HDF5.Group) where {T <: Tuple}
38-
for k in keys(g)
37+
function BackedAlignedMapping{T}(r, g) where {T <: Tuple}
38+
for k keys(g)
3939
checkdim(T, backed_matrix(g[k]), r, k)
4040
end
41-
return new{T, typeof(r)}(r, g, nothing, nothing)
41+
return new{T, typeof(g), typeof(r)}(r, g, nothing, nothing)
4242
end
43-
function BackedAlignedMapping{T}(
44-
r,
45-
parent::Union{HDF5.File, HDF5.Group},
46-
path::String,
47-
) where {T <: Tuple}
43+
function BackedAlignedMapping{T}(r, parent::Group, path::String) where {T <: Tuple}
4844
if haskey(parent, path)
4945
return BackedAlignedMapping{T}(r, parent[path])
5046
else
51-
return new{T, typeof(r)}(r, nothing, parent, path)
47+
return new{T, typeof(parent), typeof(r)}(r, nothing, parent, path)
5248
end
5349
end
5450
end
5551

5652
function checkdim(::Type{T}, v, ref, k) where {T <: Tuple}
57-
for (vdim, refdim) in T.parameters
53+
for (vdim, refdim) T.parameters
5854
vsize = size(v, vdim)
5955
rsize = size(ref, refdim)
6056
if vsize != rsize
@@ -102,13 +98,12 @@ Base.sizehint!(d::AlignedMapping, n) = sizehint!(d.d, n)
10298
AlignedMapping{T}(r, d::AbstractDict) where {T <: Tuple} = AlignedMapping{T, keytype(d)}(r, d)
10399
AlignedMapping{T, K}(ref) where {T, K} = AlignedMapping{T}(ref, Dict{K, AbstractMatrix{<:Number}}())
104100
AlignedMapping{T, K}(ref, ::Nothing) where {T, K} = AlignedMapping{T, K}(ref)
105-
AlignedMapping{T}(r, d::HDF5.Group) where {T <: Tuple} =
106-
AligedMapping{T}(ref, read_dict_of_mixed(d))
101+
AlignedMapping{T}(r, d::Group) where {T <: Tuple} = AligedMapping{T}(ref, read_dict_of_mixed(d))
107102

108103
Base.delete!(d::BackedAlignedMapping, k) = !isnothing(d.d) && delete_object(d.d, k)
109104
function Base.empty!(d::BackedAlignedMapping)
110105
if !isnothing(d.d)
111-
for k in keys(d.d)
106+
for k keys(d.d)
112107
delete_object(d.d, k)
113108
end
114109
end
@@ -121,24 +116,27 @@ Base.get(default::Base.Callable, d::BackedAlignedMapping, key) =
121116
isnothing(d.d) || !haskey(d.d, key) ? default() : backed_matrix(d.d[key])
122117
Base.haskey(d::BackedAlignedMapping, key) = isnothing(d.d) ? false : haskey(d.d, key)
123118
Base.isempty(d::BackedAlignedMapping) = isnothing(d.d) ? true : isempty(d.d)
124-
function Base.iterate(d::BackedAlignedMapping)
119+
function Base.iterate(
120+
d::BackedAlignedMapping{T, G},
121+
i=nothing,
122+
) where {T, G <: Union{HDF5.File, HDF5.Group}}
125123
if isnothing(d.d)
126124
return nothing
127125
else
128-
next = iterate(d.d)
126+
next = iterate(d.d, i)
129127
return isnothing(next) ? next :
130128
(hdf5_object_name(next[1]) => backed_matrix(next[1]), next[2])
131129
end
132130
end
133-
function Base.iterate(d::BackedAlignedMapping, i)
134-
if isnothing(d.d)
131+
function Base.iterate(d::BackedAlignedMapping{T, G}, i=nothing) where {T, G <: ZGroup}
132+
if (isnothing(d.d))
135133
return nothing
136134
else
137135
next = iterate(d.d, i)
138-
return isnothing(next) ? next :
139-
(hdf5_object_name(next[1]) => backed_matrix(next[1]), next[2])
136+
return isnothing(next) ? next : (next[1][1] => backed_matrix(next[1][2]), next[2])
140137
end
141138
end
139+
142140
Base.length(d::BackedAlignedMapping) = isnothing(d.d) ? 0 : length(d.d)
143141
function Base.pop!(d::BackedAlignedMapping)
144142
if isnothing(d.d)
@@ -192,10 +190,10 @@ function copy_subset(
192190
J
193191
else
194192
(:)
195-
end for (vdim, refdim) in T.parameters
193+
end for (vdim, refdim) T.parameters
196194
)
197-
for (k, v) in src
198-
dst[k] = v[idx..., ((:) for i in 1:(ndims(v) - length(idx)))...]
195+
for (k, v) src
196+
dst[k] = v[idx..., ((:) for i 1:(ndims(v) - length(idx)))...]
199197
end
200198
end
201199

@@ -208,7 +206,7 @@ end
208206
function aligned_view(d::AlignedMappingView{T}, A) where {T <: Tuple}
209207
idx = Vector{Union{Colon, typeof(d.indices).parameters...}}(undef, ndims(A))
210208
idx .= (:)
211-
for ((vdim, refdim), cidx) in zip(T.parameters, d.indices)
209+
for ((vdim, refdim), cidx) zip(T.parameters, d.indices)
212210
idx[vdim] = cidx
213211
end
214212
return @inbounds view(A, idx...)
@@ -249,7 +247,8 @@ Base.pop!(d::AlignedMappingView, k, default) =
249247
Base.parent(d::AlignedMappingView) = d.parent
250248
Base.parentindices(d::AlignedMappingView) = d.indices
251249

252-
Base.setindex!(d::AlignedMappingView, v::AbstractArray, k) = throw(ArgumentError("Replacing or adding elements of an AlignedMappingView is not supported."))
250+
Base.setindex!(d::AlignedMappingView, v::AbstractArray, k) =
251+
throw(ArgumentError("Replacing or adding elements of an AlignedMappingView is not supported."))
253252

254253
function Base.view(parent::AbstractAlignedMapping{T}, indices...) where {T <: Tuple}
255254
@boundscheck if length(T.parameters) != length(indices)
@@ -262,7 +261,7 @@ function Base.view(parent::AbstractAlignedMapping{T}, indices...) where {T <: Tu
262261
return AlignedMappingView(parent, indices)
263262
end
264263

265-
function Base.view(parentview::AlignedMappingView{T}, indices...) where T <: Tuple
264+
function Base.view(parentview::AlignedMappingView{T}, indices...) where {T <: Tuple}
266265
@boundscheck if length(T.parameters) != length(indices)
267266
throw(
268267
DimensionMismatch(

src/anndata.jl

Lines changed: 65 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
abstract type AbstractAnnData end
22

33
mutable struct AnnData <: AbstractAnnData
4-
file::Union{HDF5.File, HDF5.Group, Nothing}
4+
file::Union{HDF5.File, HDF5.Group, ZGroup, Nothing}
55

66
X::Union{AbstractMatrix{<:Number}, Nothing}
77

@@ -21,13 +21,13 @@ mutable struct AnnData <: AbstractAnnData
2121

2222
uns::Dict{<:AbstractString, <:Any}
2323

24-
function AnnData(file::Union{HDF5.File, HDF5.Group}, backed=false, checkversion=true)
24+
function AnnData(file::Union{HDF5.File, HDF5.Group, ZGroup}, backed=false, checkversion=true)
2525
if checkversion
2626
attrs = attributes(file)
2727
if !haskey(attrs, "encoding-type")
28-
@warn "The HDF5 file was not created by muon, we can't guarantee that everything will work correctly"
29-
elseif attrs["encoding-type"] != "AnnData"
30-
error("This HDF5 file does not appear to hold an AnnData object")
28+
@warn "This file was not created by muon, we can't guarantee that everything will work correctly"
29+
elseif attrs["encoding-type"] != "anndata"
30+
error("This file does not appear to hold an AnnData object")
3131
end
3232
end
3333

@@ -158,9 +158,29 @@ function readh5ad(filename::AbstractString; backed=false)
158158
return adata
159159
end
160160

161+
function readzarrad(filename::AbstractString; backed=false)
162+
filename = abspath(filename)
163+
if !backed
164+
fid = zopen(filename, "r")
165+
else
166+
fid = zopen(filename, "r+")
167+
end
168+
local adata
169+
try
170+
adata = AnnData(fid, backed, true)
171+
catch e
172+
close(fid)
173+
rethrow()
174+
end
175+
if !backed
176+
close(fid)
177+
end
178+
return adata
179+
end
180+
161181
function writeh5ad(filename::AbstractString, adata::AbstractAnnData; compress::UInt8=0x9)
162182
filename = abspath(filename)
163-
if file(adata) === nothing || filename != HDF5.filename(file(adata))
183+
if isnothing(file(adata)) || filename != HDF5.filename(file(adata))
164184
hfile = h5open(filename, "w", userblock=512)
165185
try
166186
write(hfile, adata, compress=compress)
@@ -179,22 +199,43 @@ function writeh5ad(filename::AbstractString, adata::AbstractAnnData; compress::U
179199
return nothing
180200
end
181201

182-
function Base.write(
183-
parent::Union{HDF5.File, HDF5.Group},
184-
name::AbstractString,
185-
adata::AbstractAnnData;
186-
compress::UInt8=0x9
187-
)
202+
function writezarrad(filename::AbstractString, adata::AbstractAnnData; compress::UInt8=0x9)
203+
filename = abspath(filename)
204+
if isnothing(file(adata)) || filename != zarr_filename(file(adata))
205+
rm(filename, force=true, recursive=true)
206+
zfile = zgroup(filename)
207+
write(zfile, adata, compress=compress)
208+
else
209+
write(adata, compress=compress)
210+
end
211+
return nothing
212+
end
213+
214+
# HDF5.jl defines Base.write(::Union{HDF5.File, HDF5.Group}, ::Union{Nothing, AbstractString}, ::Any; kwargs...)
215+
# Using the below as Base.write leads to ambiguity: The HDF5.jl definition is more specific in the first argument,
216+
# ours is more specific in the third argument. Thus the ugly workaround.
217+
function _write(parent::Group, name::AbstractString, adata::AbstractAnnData; compress::UInt8=0x9)
188218
g = create_group(parent, name)
189219
write(g, adata, compress=compress)
190220
end
191-
192-
function Base.write(parent::Union{HDF5.File, HDF5.Group}, adata::AbstractAnnData; compress::UInt8=0x9)
193-
attrs = attributes(parent)
194-
attrs["encoding-type"] = "anndata"
195-
attrs["encoding-version"] = string(ANNDATAVERSION)
196-
attrs["encoder"] = NAME
197-
attrs["encoder-version"] = string(VERSION)
221+
Base.write(
222+
parent::Union{HDF5.File, HDF5.Group},
223+
name::AbstractString,
224+
adata::AbstractAnnData;
225+
compress::UInt8=0x9,
226+
)=_write(parent, name, adata, compress=compress)
227+
Base.write(
228+
parent::ZGroup,
229+
name::AbstractString,
230+
adata::AbstractAnnData;
231+
compress::UInt8=0x9,
232+
)=_write(parent, name, adata, compress=compress)
233+
234+
function Base.write(parent::Group, adata::AbstractAnnData; compress::UInt8=0x9)
235+
write_attribute(parent, "encoding-type", "anndata")
236+
write_attribute(parent, "encoding-version", string(ANNDATAVERSION))
237+
write_attribute(parent, "encoder", NAME)
238+
write_attribute(parent, "encoder-version", string(VERSION))
198239
if parent === file(adata)
199240
write(adata, compress=compress)
200241
else
@@ -211,7 +252,7 @@ function Base.write(adata; compress::UInt8=0x9)
211252
write_metadata(file(adata), adata, compress=compress)
212253
end
213254

214-
function write_metadata(parent::Union{HDF5.File, HDF5.Group}, adata::AbstractAnnData; compress::UInt8=0x9)
255+
function write_metadata(parent::Group, adata::AbstractAnnData; compress::UInt8=0x9)
215256
write_attr(parent, "obs", adata.obs, index=adata.obs_names, compress=compress)
216257
write_attr(parent, "obsm", adata.obsm, index=adata.obs_names, compress=compress)
217258
write_attr(parent, "obsp", adata.obsp, compress=compress)
@@ -222,7 +263,8 @@ function write_metadata(parent::Union{HDF5.File, HDF5.Group}, adata::AbstractAnn
222263
end
223264
# FileIO support
224265
load(f::File{format"h5ad"}; backed::Bool=false) = readh5ad(filename(f), backed=backed)
225-
save(f::File{format"h5ad"}, data::AbstractAnnData; compress::UInt8=0x9) = writeh5ad(filename(f), data, compress=compress)
266+
save(f::File{format"h5ad"}, data::AbstractAnnData; compress::UInt8=0x9) =
267+
writeh5ad(filename(f), data, compress=compress)
226268

227269
Base.size(adata::AbstractAnnData) = (length(adata.obs_names), length(adata.var_names))
228270
Base.size(adata::AbstractAnnData, d::Integer) = size(adata)[d]
@@ -247,15 +289,15 @@ end
247289
function Base.getindex(
248290
adata::AbstractAnnData,
249291
I::Union{
250-
AbstractUnitRange,
292+
OrdinalRange,
251293
Colon,
252294
AbstractVector{<:Integer},
253295
AbstractVector{<:AbstractString},
254296
Integer,
255297
AbstractString,
256298
},
257299
J::Union{
258-
AbstractUnitRange,
300+
OrdinalRange,
259301
Colon,
260302
AbstractVector{<:Integer},
261303
AbstractVector{<:AbstractString},

0 commit comments

Comments
 (0)