Skip to content

Commit 631a006

Browse files
committed
views improvements
- obsmap/varmap now refer to the AnnDataViews in the view, not the parent object - the modalities dict in a MuDataView is now ordered (by using freeze() from OrderedCollections.jl) - various minor fixes
1 parent f23770d commit 631a006

File tree

7 files changed

+94
-100
lines changed

7 files changed

+94
-100
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ version = "0.2.1"
66
[deps]
77
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
88
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
9-
CompressHashDisplace = "33dc01b2-0ff1-4271-8670-ed883dd4b55b"
109
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
1110
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
1211
EllipsisNotation = "da5c29d0-fa7d-589e-88eb-ea29b0a81949"
@@ -15,6 +14,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
1514
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
1615
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1716
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
17+
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
1818
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1919
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
2020
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -25,14 +25,14 @@ Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
2525
[compat]
2626
CategoricalArrays = "0.9, 0.10, 1"
2727
Compat = "4.10.0"
28-
CompressHashDisplace = "0.1.2"
2928
DataFrames = "1.5"
3029
DataStructures = "0.18, 0.19"
3130
EllipsisNotation = "1.8.0"
3231
FileIO = "1.6"
3332
FillArrays = "1.13.0"
3433
HDF5 = "0.16 - 0.99, 1"
3534
Logging = "1.6.7"
35+
OrderedCollections = "1.6"
3636
PooledArrays = "1"
3737
StructArrays = "0.6.4, 0.7"
3838
Zarr = "0.9"

src/Muon.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ using StructArrays
1313
using PooledArrays
1414
using FillArrays
1515
using DataStructures
16-
import CompressHashDisplace: FrozenDict
16+
using OrderedCollections
1717
using FileIO
1818

1919
using Compat

src/anndata.jl

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ function Base.getindex(
355355
)
356356
@boundscheck checkbounds(adata, I, J)
357357
i, j = convertidx(I, adata.obs_names), convertidx(J, adata.var_names)
358-
newad = AnnData(
358+
@inbounds newad = AnnData(
359359
X=adata.X[i, j],
360360
obs=isempty(adata.obs) ? nothing : adata.obs[i, :],
361361
obs_names=adata.obs_names[i],
@@ -394,10 +394,14 @@ struct AnnDataView{Ti, Tj} <: AbstractAnnData
394394
uns::Dict{<:AbstractString, <:Any}
395395
end
396396

397-
function Base.view(ad::AnnData, I, J)
397+
@inline function Base.view(
398+
ad::AnnData,
399+
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
400+
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
401+
)
398402
@boundscheck checkbounds(ad, I, J)
399403
i, j = convertidx(I, ad.obs_names), convertidx(J, ad.var_names)
400-
X = isbacked(ad) ? nothing : @view ad.X[i, j]
404+
X = isbacked(ad) ? nothing : @inbounds view(ad.X, i, j)
401405

402406
return AnnDataView(
403407
ad,
@@ -416,10 +420,14 @@ function Base.view(ad::AnnData, I, J)
416420
ad.uns,
417421
)
418422
end
419-
function Base.view(ad::AnnDataView, I, J)
423+
function Base.view(
424+
ad::AnnDataView,
425+
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
426+
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
427+
)
420428
@boundscheck checkbounds(ad, I, J)
421429
i, j = Base.reindex(parentindices(ad), (convertidx(I, ad.obs_names), convertidx(J, ad.var_names)))
422-
return view(parent(ad), i, j)
430+
return @inbounds view(parent(ad), i, j)
423431
end
424432

425433
function Base.getproperty(ad::AnnDataView, s::Symbol)

src/index.jl

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ end
270270
end
271271
@inline function Base.view(idx::SubIndex, I)
272272
@boundscheck checkbounds(idx, I)
273-
return @inbounds view(parent(idx), Base.reindex((parentindices(idx),), (I,))[1])
273+
return @inbounds view(parent(idx), Base.reindex((parentindices(idx),), (I,))...)
274274
end
275275

276276
Base.copy(si::SubIndex) = Index(si)
@@ -373,46 +373,39 @@ function Base.getindex(si::SubIndex{T}, elem::T, ::Val{false}) where {T}
373373
end
374374
return res
375375
end
376-
Base.@propagate_inbounds function Base.getindex(si::SubIndex, i::Union{Integer, AbstractVector{<:Integer}})
376+
@inline function Base.getindex(si::SubIndex, i::Union{Integer, AbstractVector{<:Integer}})
377377
@boundscheck checkbounds(si, i)
378-
return parent(si)[Base.reindex((parentindices(si),), (i,))[1]]
378+
return @inbounds parent(si)[Base.reindex((parentindices(si),), (i,))...]
379379
end
380-
Base.@propagate_inbounds function Base.getindex(
381-
si::SubIndex{T, V, Colon},
382-
i::Union{Integer, AbstractVector{<:Integer}},
383-
) where {T, V}
380+
@inline function Base.getindex(si::SubIndex{T, V, Colon}, i::Union{Integer, AbstractVector{<:Integer}}) where {T, V}
384381
@boundscheck checkbounds(si, i)
385-
return parent(si)[i]
382+
return @inbounds parent(si)[i]
386383
end
387384

388-
Base.@propagate_inbounds function Base.setindex!(si::SubIndex{T}, newval::T, i::Integer) where {T}
385+
@inline function Base.setindex!(si::SubIndex{T}, newval::T, i::Integer) where {T}
389386
@boundscheck checkbounds(si, i)
390-
setindex!(parent(si), newval, Base.reindex((parentindices(si),), (i,))[1])
387+
@inbounds setindex!(parent(si), newval, Base.reindex((parentindices(si),), (i,))...)
391388
return si
392389
end
393-
Base.@propagate_inbounds function Base.setindex!(si::SubIndex{T, V, Colon}, newval::T, i::Integer) where {T, V}
390+
@inline function Base.setindex!(si::SubIndex{T, V, Colon}, newval::T, i::Integer) where {T, V}
394391
@boundscheck checkbounds(si, i)
395-
return setindex!(parent(si), newval, i)
392+
return @inbounds setindex!(parent(si), newval, i)
396393
end
397-
Base.@propagate_inbounds function Base.setindex!(si::SubIndex{T}, newval::T, oldval::T) where {T}
394+
function Base.setindex!(si::SubIndex{T}, newval::T, oldval::T) where {T}
398395
oldidx = parent(si)[oldval, true]
399396
foldidx = findfirst(in(parentindices(si)), oldidx)
400397
if isnothing(foldidx)
401398
throw(KeyError(oldval))
402399
end
403-
parent(si)[oldidx[foldidx]] = newval
400+
@inbounds parent(si)[oldidx[foldidx]] = newval
404401
return si
405402
end
406-
Base.@propagate_inbounds function Base.setindex!(
407-
si::SubIndex{T, V, I},
408-
newval::T,
409-
oldval::T,
410-
) where {T, V, I <: AbstractArray{<:Integer}}
403+
function Base.setindex!(si::SubIndex{T, V, I}, newval::T, oldval::T) where {T, V, I <: AbstractArray{<:Integer}}
411404
oldidx = parent(si)[oldval, true]
412405
foldidx = findfirst(in(si.revmapping), oldidx)
413406
if isnothing(foldidx)
414407
throw(KeyError(oldval))
415408
end
416-
parent(si)[oldidx[foldidx]] = newval
409+
@inbounds parent(si)[oldidx[foldidx]] = newval
417410
return si
418411
end

src/mudata.jl

Lines changed: 52 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -361,18 +361,15 @@ Base.setindex!(mdata::MuData, ad::AnnData, key::AbstractString) = setindex!(mdat
361361
Base.setindex!(mdata::MuData, ad::AnnData, key::Symbol) = setindex!(mdata.mod, ad, string(key))
362362

363363
function Base.getindex(
364-
mdata::MuData,
364+
mdata::AbstractMuData,
365365
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
366366
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
367367
)
368368
@boundscheck checkbounds(mdata, I, J)
369369
i, j = convertidx(I, mdata.obs_names), convertidx(J, mdata.var_names)
370-
newmu = MuData(
370+
@inbounds newmu = MuData(
371371
mod=OrderedDict{String, AnnData}(
372-
k => ad[
373-
getadidx(i, vec(mdata.obsmap[k]), mdata.obs_names),
374-
getadidx(j, vec(mdata.varmap[k]), mdata.var_names),
375-
] for (k, ad) mdata.mod
372+
k => ad[getadidx(i, vec(mdata.obsmap[k])), getadidx(j, vec(mdata.varmap[k]))] for (k, ad) mdata.mod
376373
),
377374
obs=isempty(mdata.obs) ? nothing : mdata.obs[i, :],
378375
obs_names=mdata.obs_names[i],
@@ -394,13 +391,9 @@ function Base.getindex(
394391
return newmu
395392
end
396393

397-
getadidx(I::Colon, ref::AbstractVector{<:Unsigned}, idx::AbstractIndex{<:AbstractString}, reduce_memory=false) = I
398-
function getadidx(
399-
I::Union{AbstractVector{<:Integer}, OrdinalRange},
400-
ref::AbstractVector{<:Unsigned},
401-
idx::AbstractIndex{<:AbstractString},
402-
reduce_memory=false,
403-
)
394+
getadidx(I::Colon, ref::AbstractVector{<:Unsigned}, reduce_memory=false) = I
395+
getadidx(I::OrdinalRange, ref::AbstractVector{<:Unsigned}, reduce_memory=false) = filter(>(0x0), ref[I])
396+
function getadidx(I::AbstractVector{<:Integer}, ref::AbstractVector{<:Unsigned}, reduce_memory=false)
404397
J = filter(>(0x0), ref[I])
405398
if reduce_memory && length(J) > 0
406399
diff = J[end] - J[1]
@@ -414,8 +407,7 @@ function getadidx(
414407
end
415408
return J
416409
end
417-
getadidx(I::Number, ref::AbstractVector{<:Unsigned}, idx::Index{<:AbstractString}, reduce_memory=false) =
418-
getadidx([I], ref, idx, reduce_memory)
410+
getadidx(I::Number, ref::AbstractVector{<:Unsigned}, reduce_memory=false) = getadidx([I], ref, reduce_memory)
419411

420412
adjustmap!(map::AbstractVector{<:Unsigned}, I::Colon) = map
421413
function adjustmap!(map::AbstractVector{<:Unsigned}, I::Union{Integer, AbstractVector{<:Integer}, OrdinalRange})
@@ -999,77 +991,78 @@ push_var!(
999991
) where {N, M} =
1000992
_push_attr!(mdata, :var, 0x2, columns, mods, common=common, prefixed=prefixed, drop=drop, only_drop=only_drop)
1001993

1002-
struct MuDataView{Ti, Tj} <: AbstractMuData
994+
mutable struct MuDataView{Ti, Tj} <: AbstractMuData
1003995
parent::MuData
1004996
I::Ti
1005997
J::Tj
1006998

1007-
mod::FrozenDict{String, <:AnnDataView}
999+
mod::LittleDict{String, <:AnnDataView}
10081000
obs::SubDataFrame
10091001
obs_names::SubIndex{<:AbstractString}
10101002
obsm::StrAlignedMappingView{Tuple{1 => 1}}
10111003
obsp::StrAlignedMappingView{Tuple{1 => 1, 2 => 1}}
1012-
obsmap::StrAlignedMappingView{Tuple{1 => 1}}
1004+
obsmap::StrAlignedMapping{Tuple{1 => 1}, MuDataView{Ti, Tj}, false}
10131005

10141006
var::SubDataFrame
10151007
var_names::SubIndex{<:AbstractString}
10161008
varm::StrAlignedMappingView{Tuple{1 => 2}}
10171009
varp::StrAlignedMappingView{Tuple{1 => 2, 2 => 2}}
1018-
varmap::StrAlignedMappingView{Tuple{1 => 2}}
1010+
varmap::StrAlignedMapping{Tuple{1 => 2}, MuDataView{Ti, Tj}, false}
10191011

10201012
uns::Dict{<:AbstractString, <:Any}
1013+
1014+
function MuDataView{Ti, Tj}(parent::MuData, I::Ti, J::Tj) where {Ti, Tj}
1015+
mv = new(
1016+
parent,
1017+
I,
1018+
J,
1019+
freeze(
1020+
OrderedDict(
1021+
m => view(ad, getadidx(I, vec(parent.obsmap[m]), true), getadidx(J, vec(parent.varmap[m]), true)) for (m, ad) parent.mod
1022+
),
1023+
),
1024+
)
1025+
1026+
mv.obs = view(parent.obs, nrow(parent.obs) > 0 ? I : (:), :)
1027+
mv.obs_names = view(parent.obs_names, I)
1028+
mv.obsm = view(parent.obsm, I)
1029+
mv.obsp = view(parent.obsp, I, I)
1030+
1031+
mv.var = view(parent.var, nrow(parent.var) > 0 ? J : (:), :)
1032+
mv.var_names = view(parent.var_names, J)
1033+
mv.varm = view(parent.varm, J)
1034+
mv.varp = view(parent.varp, J, J)
1035+
1036+
mv.obsmap = StrAlignedMapping{Tuple{1 => 1}, false}(mv)
1037+
mv.varmap = StrAlignedMapping{Tuple{1 => 2}, false}(mv)
1038+
copy_subset(parent.obsmap, mv.obsmap, I, J),
1039+
copy_subset(parent.varmap, mv.varmap, I, J),
1040+
for mod keys(mv.mod)
1041+
adjustmap!(vec(mv.obsmap[mod]), I)
1042+
adjustmap!(vec(mv.varmap[mod]), J)
1043+
end
1044+
return mv
1045+
end
10211046
end
1047+
MuDataView(parent::MuData, I::Ti, J::Tj) where {Ti, Tj} = MuDataView{Ti, Tj}(parent, I, J)
10221048

1023-
function Base.view(
1049+
@inline function Base.view(
10241050
mu::MuData,
1025-
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
1026-
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
1051+
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
1052+
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
10271053
)
10281054
@boundscheck checkbounds(mu, I, J)
10291055
i, j = convertidx(I, mu.obs_names), convertidx(J, mu.var_names)
1030-
mod = Dict(
1031-
m => view(
1032-
ad,
1033-
getadidx(i, vec(mu.obsmap[m]), mu.obs_names, true),
1034-
getadidx(j, vec(mu.varmap[m]), mu.var_names, true),
1035-
) for (m, ad) mu.mod
1036-
)
1037-
return MuDataView(
1038-
mu,
1039-
i,
1040-
j,
1041-
FrozenDict(mod),
1042-
view(mu.obs, nrow(mu.obs) > 0 ? i : (:), :),
1043-
view(mu.obs_names, i),
1044-
view(mu.obsm, i),
1045-
view(mu.obsp, i, i),
1046-
view(mu.obsmap, i),
1047-
view(mu.var, nrow(mu.var) > 0 ? j : (:), :),
1048-
view(mu.var_names, j),
1049-
view(mu.varm, j),
1050-
view(mu.varp, j, j),
1051-
view(mu.varmap, j),
1052-
mu.uns,
1053-
)
1056+
return MuDataView(mu, i, j)
10541057
end
10551058
function Base.view(
10561059
mu::MuDataView,
1057-
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
1058-
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
1059-
)
1060-
@boundscheck checkbounds(mu, I, J)
1061-
i, j = Base.reindex(parentindices(mu), (convertidx(I, mu.obs_names), convertidx(J, mu.var_names)))
1062-
return view(parent(mu), i, j)
1063-
end
1064-
1065-
function Base.getindex(
1066-
mu::MuDataView,
1067-
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
1068-
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Number, AbstractString},
1060+
I::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
1061+
J::Union{OrdinalRange, Colon, AbstractVector{<:Integer}, AbstractVector{<:AbstractString}, Integer, AbstractString},
10691062
)
10701063
@boundscheck checkbounds(mu, I, J)
10711064
i, j = Base.reindex(parentindices(mu), (convertidx(I, mu.obs_names), convertidx(J, mu.var_names)))
1072-
return getindex(parent(mu), i, j)
1065+
return @inbounds view(parent(mu), i, j)
10731066
end
10741067

10751068
Base.parent(mu::MuData) = mu

src/util.jl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,14 @@ function minimum_unsigned_type_for_n(n::Number)
6868
return mintype
6969
end
7070

71-
@inline function convertidx(
72-
idx::Union{OrdinalRange, Colon, AbstractVector{<:Integer}},
73-
ref::AbstractIndex{<:AbstractString},
74-
)
71+
@inline function convertidx(idx::Union{OrdinalRange, Colon, AbstractVector{<:Integer}}, ref::AbstractIndex)
7572
return idx
7673
end
77-
@inline function convertidx(idx::Integer, ref::AbstractIndex{<:AbstractString})
74+
@inline function convertidx(idx::Integer, ref::AbstractIndex)
7875
return idx:idx
7976
end
80-
@inline convertidx(idx::Union{AbstractString, AbstractVector{<:AbstractString}}, ref::AbstractIndex{<:AbstractString}) =
81-
ref[idx, true]
77+
@inline convertidx(idx::Union{AbstractString, AbstractVector{<:AbstractString}}, ref::AbstractIndex) = ref[idx, true]
78+
@inline convertidx(idx::AbstractVector{Bool}, ref::AbstractIndex) = findall(idx)
8279

8380
Base.axes(A::Union{AbstractMuData, AbstractAnnData}) = map(n -> Base.OneTo(n), size(A))
8481

test/mudata.jl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,18 @@ function test_row_slice(md, i1, n, d, j=:)
127127
ad1_names = filter(in(md["ad1"].obs_names), md.obs_names[i1])
128128
ad2_names = filter(in(md["ad2"].obs_names), md.obs_names[i1])
129129

130-
ad1_idx = filter(>(0x0), reshape(md.obsmap["ad1"], :)[i1])
131-
ad2_idx = filter(>(0x0), reshape(md.obsmap["ad2"], :)[i1])
130+
ad1_idx = filter(>(0x0), vec(md.obsmap["ad1"])[i1])
131+
ad2_idx = filter(>(0x0), vec(md.obsmap["ad2"])[i1])
132+
ad1, ad2 = md["ad1"][ad1_idx, j], md["ad2"][ad2_idx, j]
132133

133-
ad1, ad2 = parent(md["ad1"])[ad1_idx, j], parent(md["ad2"])[ad2_idx, j]
134+
parent_ad1_idx = filter(>(0x0), vec(parent(md).obsmap["ad1"])[parentindices(md)[1][i1]])
135+
parent_ad2_idx = filter(>(0x0), vec(parent(md).obsmap["ad2"])[parentindices(md)[1][i1]])
136+
parent_ad1, parent_ad2 = parent(md)["ad1"][parent_ad1_idx, j], parent(md["ad2"])[parent_ad2_idx, j]
134137

135138
@test md1["ad1"].X == md2["ad1"].X == md4["ad1"].X == ad1.X
136139
@test md1["ad2"].X == md2["ad2"].X == md4["ad2"].X == ad2.X
137-
@test md1["ad1"].obs_names == md2["ad1"].obs_names == md4["ad1"].obs_names == ad1.obs_names
138-
@test md1["ad2"].obs_names == md2["ad2"].obs_names == md4["ad2"].obs_names == ad2.obs_names
140+
@test md1["ad1"].obs_names == md2["ad1"].obs_names == md4["ad1"].obs_names == ad1.obs_names == parent_ad1.obs_names
141+
@test md1["ad2"].obs_names == md2["ad2"].obs_names == md4["ad2"].obs_names == ad2.obs_names == parent_ad2.obs_names
139142
end
140143

141144
function test_md_slicing(md, n, d, j=:)
@@ -230,7 +233,7 @@ end
230233
if unique && axis == 0x2
231234
md = (@test_nowarn MuData(mod=Dict("ad1" => ad1, "ad2" => ad2, "ad3" => ad3), axis=axis))
232235
else
233-
md = with_logger(NullLogger()) do # warning depends on the RNG, and differs between Julia versions
236+
md = with_logger(NullLogger()) do # warning depends on the RNG, and differs between Julia versions
234237
MuData(mod=Dict("ad1" => ad1, "ad2" => ad2, "ad3" => ad3), axis=axis)
235238
end
236239
end

0 commit comments

Comments
 (0)