Skip to content

Commit fb2f70c

Browse files
committed
Add method to pick the best matrix class for count
- Avoid integer overflow when trying to convert a `matrix` with more than 2^31-1 non-null values to a sparse matrix - Prefer R base dense `matrix` class when less than 35% of zeros (smaller object size than `dgCMatrix`)
1 parent c0af823 commit fb2f70c

File tree

14 files changed

+90
-5
lines changed

14 files changed

+90
-5
lines changed

DESCRIPTION

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ Imports:
5252
scales,
5353
cli,
5454
pbapply,
55-
matrixStats
55+
matrixStats,
56+
Rcpp
5657
Suggests:
5758
SeuratData,
5859
lisi,
@@ -67,6 +68,7 @@ Collate:
6768
'Combat.R'
6869
'Harmony.R'
6970
'MNN.R'
71+
'RcppExports.R'
7072
'Scanorama.R'
7173
'SeuratIntegrate-package.R'
7274
'bbknn.R'
@@ -104,3 +106,6 @@ BugReports: https://github.com/cbib/Seurat-Integrate/issues
104106
Remotes: SeuratData=github::satijalab/seurat-data,
105107
github::theislab/kBET,
106108
lisi=github::immunogenomics/LISI
109+
LinkingTo:
110+
Rcpp,
111+
RcppArmadillo

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ importFrom(Matrix,spMatrix)
108108
importFrom(Matrix,sparse.model.matrix)
109109
importFrom(Matrix,sparseMatrix)
110110
importFrom(Matrix,t)
111+
importFrom(Rcpp,sourceCpp)
111112
importFrom(RhpcBLASctl,blas_get_num_procs)
112113
importFrom(RhpcBLASctl,blas_set_num_threads)
113114
importFrom(RhpcBLASctl,omp_get_num_procs)
@@ -338,3 +339,4 @@ importFrom(utils,object.size)
338339
importFrom(utils,setTxtProgressBar)
339340
importFrom(vctrs,vec_cast)
340341
importFrom(vctrs,vec_ptype2)
342+
useDynLib(SeuratIntegrate, .registration = TRUE)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# SeuratIntegrate (development version)
22

3+
* The most suited matrix format is automatically chosen for corrected counts
4+
output by integration methods (should be dense matrix most of the time)
5+
36
* Add support for scGraph metric (https://doi.org/10.1101/2024.04.02.587824)
47

58
* Improved speed of `CreateIntegrationGroups` for non-SCT assay in unambiguous cases

R/Combat.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ CombatIntegration <- function(
145145
colnames(corrected.mat) <- colnames(data)
146146
output.list <- list()
147147
output.list[[reconstructed.assay]] <- CreateAssayObject(
148-
data = as(corrected.mat, "sparseMatrix"),
148+
data = choose_matrix_format(corrected.mat),
149149
key = key.assay)
150150
if(use.scaled) {
151151
output.list[[reconstructed.assay]] <- SetAssayData(

R/MNN.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ MNNIntegration <- function(
111111

112112
# Add reconstructed matrix (gene x cell)
113113
reconstructed_assay <- CreateAssayObject(
114-
data = as(object = assay(x = out), Class = "sparseMatrix"),
114+
data = choose_matrix_format(mat = assay(x = out)),
115115
)
116116
# Add variable features
117117
VariableFeatures(object = reconstructed_assay) <- features

R/RcppExports.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
n_zeros_mat <- function(mat) {
5+
.Call(`_SeuratIntegrate_n_zeros_mat`, mat)
6+
}
7+

R/Scanorama.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ ScanoramaIntegration <- function(
220220
}
221221
if (cntcor) {
222222
output.list[[reconstructed.assay]] <- CreateAssayObject(
223-
data = t( as(cnts.scano, "sparseMatrix") )
223+
data = t( choose_matrix_format(cnts.scano) )
224224
)
225225
VariableFeatures(object = output.list[[reconstructed.assay]]) <- features
226226
}

R/SeuratIntegrate-package.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
"_PACKAGE"
33

44
## usethis namespace: start
5+
#' @import rlang
6+
#' @importFrom Rcpp sourceCpp
57
#' @importFrom rlang %||% check_installed abort
68
#' @importFrom SeuratObject %iff%
7-
#' @import rlang
9+
#' @useDynLib SeuratIntegrate, .registration = TRUE
810
## usethis namespace: end
911

1012
#' @name integration-method

R/utils.R

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,20 @@ could.be.connectivity.Matrix <- function(object, check.symmetry = T) {
741741
(! check.symmetry || isSymmetric(object))
742742
return(res)
743743
}
744+
#' @keywords internal
745+
#' @noRd
746+
choose_matrix_format <- function(mat) {
747+
max_int_32bit <- 2^31 - 1
748+
if (all(c(ncol(mat), nrow(mat)) <= max_int_32bit)) { # should always be TRUE
749+
n_0s <- n_zeros_mat(mat = mat)
750+
if ( (n_0s > max_int_32bit) | (n_0s/length(mat) < .35) ) {
751+
mat <- as.matrix(mat)
752+
} else {
753+
as.dgcmatrix(mat)
754+
}
755+
}
756+
return(mat)
757+
}
744758

745759

746760
# Creates data.frame with cell group assignments for integration

src/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.o
2+
*.so
3+
*.dll

0 commit comments

Comments
 (0)