Skip to content

Commit bdbfa9d

Browse files
Lazy dep on Muon.jl for loading h5ad files (#24)
Load h5ad files using Muon.jl (as a package extension). Patch bump.
1 parent c1919f9 commit bdbfa9d

File tree

6 files changed

+221
-4
lines changed

6 files changed

+221
-4
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.4.3] - 2024-11-06
11+
12+
### Added
13+
14+
* Package extension for `Muon.jl` that allows loading data from .h5ad files using the functions `create_datamatrix`, `create_var` and `create_obs`.
15+
16+
### Fixed
17+
18+
* Deprecated old `loadh5ad` function that only supported some versions of the .h5ad format.
19+
1020
## [0.4.2] - 2024-09-27
1121

1222
### Fixed

Project.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "SingleCellProjections"
22
uuid = "03d38035-ed2f-4a36-82eb-797f1727ab2e"
33
authors = ["Rasmus Henningsson <rasmus.henningsson@med.lu.se>"]
4-
version = "0.4.2"
4+
version = "0.4.3"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
@@ -23,13 +23,15 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
2323
ThreadedSparseArrays = "59d54670-b8ac-4d81-ab7a-bb56233e17ab"
2424

2525
[weakdeps]
26-
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
26+
Muon = "446846d7-b4ce-489d-bf74-72da18fe3629"
2727
PrincipalMomentAnalysis = "6a3ba550-3b7f-11e9-2734-d9178ad1e8db"
28+
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
2829
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
2930
TSne = "24678dba-d5e9-5843-a4c6-250288b04835"
3031
UMAP = "c4f8c510-2410-5be4-91d7-4fbaeb39457e"
3132

3233
[extensions]
34+
SingleCellProjectionsMuonExt = "Muon"
3335
SingleCellProjectionsPrincipalMomentAnalysisExt = "PrincipalMomentAnalysis"
3436
SingleCellProjectionsStableRNGsExt = "StableRNGs"
3537
SingleCellProjectionsTSneExt = "TSne"
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
module SingleCellProjectionsMuonExt
2+
3+
using SingleCellProjections
4+
using DataFrames
5+
6+
if isdefined(Base, :get_extension)
7+
using Muon: AnnData, AlignedMapping
8+
else
9+
using ..Muon: AnnData, AlignedMapping
10+
end
11+
12+
13+
function aligned_mapping_type(am::AlignedMapping)
14+
ref = am.ref
15+
am === ref.layers && return :layers
16+
am === ref.obsm && return :obsm
17+
am === ref.obsp && return :obsp
18+
am === ref.varm && return :varm
19+
am === ref.varp && return :varp
20+
throw(ArgumentError("Unknown AlignedMapping"))
21+
end
22+
23+
"""
24+
create_var(a::AnnData)
25+
26+
Create a `DataFrame` where the first column contains `var` IDs and the remaining columns contain the `var` annotations from the `AnnData` object.
27+
28+
!!! note
29+
The interface for loading data from .h5ad files is still considered experimental and might change in a non-breaking release.
30+
31+
See also: [`create_datamatrix`](@ref), [`create_obs`](@ref)
32+
"""
33+
SingleCellProjections.create_var(a::AnnData) =
34+
insertcols(a.var, 1, :id=>collect(a.var_names); makeunique=true)
35+
36+
"""
37+
create_obs(a::AnnData)
38+
39+
Create a `DataFrame` where the first column contains `obs` IDs and the remaining columns contain the `obs` annotations from the `AnnData` object.
40+
41+
!!! note
42+
The interface for loading data from .h5ad files is still considered experimental and might change in a non-breaking release.
43+
44+
See also: [`create_datamatrix`](@ref), [`create_var`](@ref)
45+
"""
46+
SingleCellProjections.create_obs(a::AnnData) =
47+
insertcols(a.obs, 1, :cell_id=>collect(a.obs_names); makeunique=true)
48+
49+
get_var(a::AnnData; add_var) =
50+
add_var ? create_var(a) : DataFrame(; id=collect(a.var_names))
51+
get_obs(a::AnnData; add_obs) =
52+
add_obs ? create_obs(a) : DataFrame(; cell_id=collect(a.obs_names))
53+
54+
55+
function convert_matrix(::Type{T}, X) where T
56+
eltype(X) <: T && return X
57+
convert.(T, X) # handles both sparse and dense cases, gets rid of transposes
58+
end
59+
60+
61+
62+
function _transpose(X::PermutedDimsArray)
63+
Xt = parent(X)
64+
@assert PermutedDimsArray(Xt, (2,1)) === X
65+
Xt
66+
end
67+
_transpose(X) = X'
68+
69+
70+
"""
71+
create_datamatrix([T], a::AnnData; add_var=false, add_obs=false)
72+
create_datamatrix([T], am::AlignedMapping, name; add_var=false, add_obs=false)
73+
74+
Creates a `DataMatrix` from an `AnnData` object.
75+
By default, the main matrix `X` is retrieved from `a::AnnData`.
76+
It is also possible to create `DataMatrices` from named objects in: `a.layers`, `a.obsm`, `a.obsp`, `a.varm` and `a.varp`. See examples below.
77+
78+
The optional parameter `T` determines the `eltype` of the returned matrix. If specified, the matrix will be converted to have this `eltype`.
79+
80+
kwargs:
81+
* add_var: Add `var` from the AnnData object to the returned `DataMatrix` (when applicable).
82+
* add_obs: Add `obs` from the AnnData object to the returned `DataMatrix` (when applicable).
83+
84+
!!! note
85+
The interface for loading data from .h5ad files is still considered experimental and might change in a non-breaking release.
86+
87+
# Examples
88+
89+
All examples below assume that an AnnData object has been loaded first:
90+
```julia
91+
julia> using Muon
92+
93+
julia> a = readh5ad("path/to/file.h5ad");
94+
```
95+
96+
* Load the main matrix `X` from an AnnData object.
97+
```julia
98+
julia> create_datamatrix(a)
99+
DataMatrix (123 variables and 456 observations)
100+
SparseMatrixCSC{Float32, Int32}
101+
Variables: id
102+
Observations: cell_id
103+
```
104+
105+
* Load the main matrix `X` from an AnnData object, and add `var`/`obs` annotations.
106+
```julia
107+
julia> create_datamatrix(a; add_var=true, add_obs=true)
108+
DataMatrix (123 variables and 456 observations)
109+
SparseMatrixCSC{Float32, Int32}
110+
Variables: id, feature_type, ...
111+
Observations: cell_id, cell_type, ...
112+
```
113+
114+
* Load the main matrix `X` from an AnnData object, with eltype `Int`. NB: This will fail if the matrix is not a count matrix.
115+
```julia
116+
julia> create_datamatrix(Int, a)
117+
DataMatrix (123 variables and 456 observations)
118+
SparseMatrixCSC{Int64, Int32}
119+
Variables: id
120+
Observations: cell_id
121+
```
122+
123+
* Load the matrix named `raw_counts` from `layers`, with eltype `Int`. NB: This will fail if the matrix is not a count matrix.
124+
```julia
125+
julia> create_datamatrix(Int, a.layers, "raw_counts")
126+
DataMatrix (123 variables and 456 observations)
127+
SparseMatrixCSC{Int64, Int32}
128+
Variables: id
129+
Observations: cell_id
130+
```
131+
132+
* Load the matrix named `UMAP` from `obsm`.
133+
```julia
134+
julia> create_datamatrix(a.obsm, "UMAP")
135+
DataMatrix (2 variables and 456 observations)
136+
Matrix{Float64}
137+
Variables: id
138+
Observations: cell_id
139+
```
140+
141+
See also: [`create_var`](@ref), [`create_obs`](@ref)
142+
"""
143+
function SingleCellProjections.create_datamatrix(::Type{T}, a::AnnData; add_var=false, add_obs=false) where T
144+
X = _transpose(a.X)
145+
var = get_var(a; add_var)
146+
obs = get_obs(a; add_obs)
147+
X = convert_matrix(T, X)
148+
DataMatrix(X, var, obs)
149+
end
150+
SingleCellProjections.create_datamatrix(a::AnnData; kwargs...) = create_datamatrix(Any, a; kwargs...)
151+
152+
function SingleCellProjections.create_datamatrix(::Type{T}, am::AlignedMapping, name; add_var=false, add_obs=false) where T
153+
a = am.ref
154+
am_type = aligned_mapping_type(am)
155+
X = am[name]
156+
157+
new_ids = nothing
158+
if X isa DataFrame
159+
new_ids = names(X)
160+
X = Matrix(X)
161+
end
162+
163+
@assert ndims(X) == 2 "Expected DataMatrix to have 2 dimensions, got $(ndims(X))"
164+
165+
if am_type == :layers
166+
X = _transpose(X)
167+
var = get_var(a; add_var)
168+
obs = get_obs(a; add_obs)
169+
elseif am_type == :obsm
170+
X = _transpose(X)
171+
id = @something new_ids string.("Dim", 1:size(X,1))
172+
var = DataFrame(; id)
173+
obs = get_obs(a; add_obs)
174+
elseif am_type == :obsp
175+
X = _transpose(X)
176+
var = obs = get_obs(a; add_obs)
177+
elseif am_type == :varm
178+
var = get_var(a; add_var)
179+
id = @something new_ids string.("Dim", 1:size(X,2))
180+
obs = DataFrame(; id)
181+
elseif am_type == :varp
182+
var = obs = get_var(a; add_var)
183+
end
184+
185+
X = convert_matrix(T, X)
186+
DataMatrix(X, var, obs)
187+
end
188+
SingleCellProjections.create_datamatrix(am::AlignedMapping, name; kwargs...) = create_datamatrix(Any, am, name; kwargs...)
189+
190+
191+
192+
193+
end

src/SingleCellProjections.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ export
5858
ttest_table,
5959
mannwhitney!,
6060
mannwhitney,
61-
mannwhitney_table
61+
mannwhitney_table,
62+
create_datamatrix,
63+
create_var,
64+
create_obs
6265

6366
using LinearAlgebra
6467
import LinearAlgebra: svd
@@ -140,6 +143,7 @@ include("precompile.jl")
140143
@require TSne="24678dba-d5e9-5843-a4c6-250288b04835" include("../ext/SingleCellProjectionsTSneExt.jl")
141144
@require PrincipalMomentAnalysis="6a3ba550-3b7f-11e9-2734-d9178ad1e8db" include("../ext/SingleCellProjectionsPrincipalMomentAnalysisExt.jl")
142145
@require StableRNGs="860ef19b-820b-49d6-a774-d7a799459cd3" include("../ext/SingleCellProjectionsStableRNGsExt.jl")
146+
@require Muon="446846d7-b4ce-489d-bf74-72da18fe3629" include("../ext/SingleCellProjectionsMuonExt.jl")
143147
end
144148
end
145149

src/datamatrix.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct DataMatrix{T,Tv,To}
4444
end
4545

4646
validateunique_var(var, 1; report=duplicate_var)
47-
validateunique_var(obs, 1; report=duplicate_obs)
47+
validateunique_obs(obs, 1; report=duplicate_obs)
4848
new{T,Tv,To}(matrix, var, obs, models)
4949
end
5050
end

src/h5ad.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
function create_datamatrix end
2+
function create_var end
3+
function create_obs end
14

25

36
_readh5ad_dataframe_string_array(g) = read(g)
@@ -54,8 +57,13 @@ end
5457
loadh5ad(filename; var_id_column=:id, obs_id_column=:id)
5558
5659
Experimental loading of .h5ad files.
60+
61+
!!! note
62+
This function is deprecated. Load `Muon.jl` and see help for `create_datamatrix`.
5763
"""
5864
function loadh5ad(filename; obs_id_column=:id, var_id_col=:id)
65+
@warn "loadh5ad is deprecated, please load Muon.jl and see help for `create_datamatrix`." maxlog=1
66+
5967
h5open(filename) do h5
6068
@assert read(attributes(h5), "encoding-type") == "anndata"
6169

0 commit comments

Comments
 (0)