diff --git a/docs/Project.toml b/docs/Project.toml index 475187f..88df471 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,3 +1,4 @@ [deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/docs/make.jl b/docs/make.jl index dadba7d..a3f766b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -using Documenter, Muon +using Documenter, Muon, DataFrames makedocs(sitename="Muon Documentation", warnonly=:cross_references) diff --git a/docs/src/objects.md b/docs/src/objects.md index 9aa67f3..7c79ef4 100644 --- a/docs/src/objects.md +++ b/docs/src/objects.md @@ -76,6 +76,21 @@ import Muon: obs_names_make_unique! # hide obs_names_make_unique!(ad) ``` +The data matrices of `AnnData` objects can be converted to a `DataFrame`, annotated with `obs` and `var` names. + +```@example 1 +using DataFrames +DataFrame(ad) +``` + +By default, the first column `obs` corresponds to the `obs_names` and the remaining columns are named according to the `var_names`. To obtain the transpose of this, pass `columns=:obs`. + +To use a different data matrix (the default is `ad.X`), pass the name of the layer: + +```julia +DataFrame(ad, layer="raw") +``` + ## MuData The basic idea behind a multimodal object is _key_ ``\rightarrow`` _value_ relationship where _keys_ represent the unique names of individual modalities and _values_ are `AnnData` objects that contain the correposnding data. Similarly to `AnnData` objects, `MuData` objects can also contain rich multimodal annotations. diff --git a/src/util.jl b/src/util.jl index 82f5116..5395557 100644 --- a/src/util.jl +++ b/src/util.jl @@ -189,3 +189,35 @@ function duplicateindices(v::Muon.Index{T, I}) where {T <: AbstractString, I <: filter!(x -> length(last(x)) > 1, varnames) varnames end + +""" + DataFrame(A::AnnData; layer=nothing, columns=:var) + +Return a DataFrame containing the data matrix `A.X` (or `layer` by +passing `layer="layername"`). By default, the first column contains +`A.obs_names` and the remaining columns are named according to +`A.var_names`, to obtain the transpose, pass `columns=:obs`. +""" +function DataFrames.DataFrame(A::AnnData; layer::Union{String, Nothing}=nothing, columns=:var) + if columns ∉ [:obs, :var] + throw(ArgumentError("columns must be :obs or :var (got: $columns)")) + end + rows = columns == :var ? :obs : :var + colnames = getproperty(A, Symbol(columns, :_names)) + if !allunique(colnames) + throw(ArgumentError("duplicate column names ($(columns)_names); run $(columns)_names_make_unique!")) + end + rownames = getproperty(A, Symbol(rows, :_names)) + + M = if isnothing(layer) + A.X + elseif layer in keys(A.layers) + A.layers[layer] + else + throw(ArgumentError("no layer $layer in adata layers")) + end + df = DataFrame(columns == :var ? M : transpose(M), colnames) + setproperty!(df, rows, rownames) + select!(df, rows, All()) + df +end diff --git a/test/anndata.jl b/test/anndata.jl index 01ebed0..c2ac051 100644 --- a/test/anndata.jl +++ b/test/anndata.jl @@ -61,3 +61,17 @@ end @test allunique(ad2.var_names) @test allunique(ad2.obs_names) end + +@testset "DataFrame conversion" begin + using DataFrames + df = DataFrame(ad) + @test names(df) == ["obs"; ad.var_names] + @test df.obs == ad.obs_names + ad.var_names[3] = "10" + @test_throws ArgumentError DataFrame(ad) + @test_throws ArgumentError DataFrame(ad, columns=:foo) + @test_throws ArgumentError DataFrame(ad, layer="doesn't exist") + df2 = DataFrame(ad, columns=:obs) + @test names(df2) == ["var"; ad.obs_names] + @test df2.var == ad.var_names +end