diff --git a/src/methods/lmds_irlba_rf/config.vsh.yaml b/src/methods/lmds_irlba_rf/config.vsh.yaml deleted file mode 100644 index 8f85382..0000000 --- a/src/methods/lmds_irlba_rf/config.vsh.yaml +++ /dev/null @@ -1,39 +0,0 @@ -__merge__: ../../api/comp_method.yaml -name: lmds_irlba_rf -label: LMDS + IRLBA + RF -summary: A random forest regression using LMDS of modality 1 to predict a PCA embedding of modality 2, which is then reversed to predict the original modality 2. -description: | - A random forest regression using LMDS of modality 1 to predict a PCA embedding of modality 2, which is then reversed to predict the original modality 2. -references: - doi: 10.1101/2022.04.11.487796 -info: - documentation_url: https://github.com/openproblems-bio/openproblems-v2/tree/main/src/tasks/predict_modality/methods #/lmds_irlba_rf - repository_url: https://github.com/openproblems-bio/openproblems-v2 - preferred_normalization: log_cp10k -arguments: - - name: "--distance_method" - type: "string" - default: "pearson" - description: The distance method to use. Possible values are euclidean, pearson, spearman and others. - - name: "--n_pcs" - type: "integer" - default: 20 - description: Number of principal components to use. - - name: "--n_trees" - type: "integer" - default: 500 - description: Number of trees to use. -resources: - - type: r_script - path: script.R -engines: - - type: docker - image: openproblems/base_r:1.0.0 - setup: - - type: r - cran: [lmds, ranger, pbapply, irlba] -runners: - - type: executable - - type: nextflow - directives: - label: [hightime, highmem, highcpu] \ No newline at end of file diff --git a/src/methods/lmds_irlba_rf/script.R b/src/methods/lmds_irlba_rf/script.R deleted file mode 100644 index 00e1ac7..0000000 --- a/src/methods/lmds_irlba_rf/script.R +++ /dev/null @@ -1,93 +0,0 @@ -cat("Loading dependencies\n") -requireNamespace("anndata", quietly = TRUE) -requireNamespace("pbapply", quietly = TRUE) -library(Matrix, warn.conflicts = FALSE, quietly = TRUE) - -## VIASH START -path <- "resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/normal/" -par <- list( - input_train_mod1 = paste0(path, "train_mod1.h5ad"), - input_test_mod1 = paste0(path, "test_mod1.h5ad"), - input_train_mod2 = paste0(path, "train_mod2.h5ad"), - output = "output.h5ad", - n_pcs = 20L, - n_trees = 50L -) -meta <- list(name = "foo") -## VIASH END - -n_cores <- parallel::detectCores(all.tests = FALSE, logical = TRUE) - -cat("Reading mod1 files\n") -input_train_mod1 <- anndata::read_h5ad(par$input_train_mod1) -input_test_mod1 <- anndata::read_h5ad(par$input_test_mod1) - -dataset_id <- input_train_mod1$uns[["dataset_id"]] - -cat("Performing DR on the mod1 values\n") -dr <- lmds::lmds( - rbind(input_train_mod1$layers[["normalized"]], input_test_mod1$layers[["normalized"]]), - ndim = par$n_pcs, - distance_method = par$distance_method -) -# alternative: -# pr_out <- irlba::prcomp_irlba( -# rbind(input_train_mod1$layers[["normalized"]], input_test_mod1$layers[["normalized"]]), -# n = par$n_pcs -# ) -# dr <- pr_out$x - -# split up dr data -ix <- seq_len(nrow(input_train_mod1)) -dr_train <- as.data.frame(dr[ix, , drop = FALSE]) -dr_test <- as.data.frame(dr[-ix, , drop = FALSE]) -dr_train <- dr[ix, , drop = FALSE] -dr_test <- dr[-ix, , drop = FALSE] - -rm(input_train_mod1, input_test_mod1) -gc() - - -cat("Reading mod2 files\n") -X_mod2 <- anndata::read_h5ad(par$input_train_mod2)$layers[["normalized"]] -prcomp_mod2 <- irlba::prcomp_irlba(X_mod2, n = par$n_pcs) -dr_mod2 <- prcomp_mod2$x - -cat("Predicting for each column in modality 2\n") -pred_drs <- pbapply::pblapply( - seq_len(ncol(dr_mod2)), - function(i) { - y <- dr_mod2[, i] - uy <- unique(y) - if (length(uy) > 1) { - rf <- ranger::ranger( - x = dr_train, - y = y, - num.trees = par$n_trees, - num.threads = n_cores - ) - stats::predict(rf, dr_test)$prediction - } else { - rep(uy, nrow(dr_test)) - } - } -) - -cat("Creating outputs object\n") -pred_dr <- Matrix::Matrix(do.call(cbind, pred_drs), sparse = TRUE) -prediction <- pred_dr %*% t(prcomp_mod2$rotation) -rownames(prediction) <- rownames(dr_test) -colnames(prediction) <- colnames(X_mod2) - -out <- anndata::AnnData( - layers = list(normalized = as(prediction, "CsparseMatrix")), - shape = dim(prediction), - uns = list( - dataset_id = dataset_id, - method_id = meta$name - ) -) - - -cat("Writing predictions to file\n") -zzz <- out$write_h5ad(par$output, compression = "gzip") diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 65f222e..54fc037 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -69,7 +69,6 @@ dependencies: - name: methods/knnr_py - name: methods/knnr_r - name: methods/lm - - name: methods/lmds_irlba_rf - name: methods/guanlab_dengkw_pm - name: metrics/correlation - name: metrics/mse diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index a6dba16..133d6e5 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -14,7 +14,6 @@ methods = [ knnr_py, knnr_r, lm, - lmds_irlba_rf, guanlab_dengkw_pm ]