Merge pull request #112 from stemangiola/improve-figurse

stemangiola · web-flow · commit e33313072a6b · 2023-04-24T09:39:48.000+02:00
Improve figures
diff --git a/R/dev.R b/R/dev.R
@@ -145,7 +145,7 @@ update_unharmonised <- function(unharmonised_parquet_dir, ...){
 #'     "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
 #' )
 #' }
-hdf5_to_anndata = function(input_directory, output_directory){
+hdf5_to_anndata <- function(input_directory, output_directory){
     dir.create(output_directory, showWarnings = FALSE)
     # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |>  purrr::walk(function(dir){
     basilisk::basiliskRun(fun = function(sce) {
@@ -185,72 +185,72 @@ hdf5_to_anndata = function(input_directory, output_directory){
     }, env = zellkonverter::zellkonverterAnnDataEnv())
 }
 
-#' Converts a series of H5-serialized Seurat to AnnData
-#' @param input_directory A character scalar. The path to a directory containing one or more
-#'  directories created by [SeuratDisk::SaveH5Seurat()].
-#' @param output_directory A character scalar. The path to a directory in which to save the
-#'  created anndata files.
-#' @keywords internal
-#' @return A character vector of the newly-created anndata files
-#' @examples
-#' \donttest{
-#' h5seurat_to_anndata(
-#'     "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
-#'     "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
-#' )
-#' h5seurat_to_anndata(
-#'     "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
-#'     "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
-#' )
-#' }
-h5seurat_to_anndata = function(input_directory, output_directory, assays = "RNA"){
-  
-  # Check if package is loaded
-  if(!"SeuratDisk" %in% (.packages()))
-    stop("CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)`")
-    
-    
-  dir.create(output_directory, showWarnings = FALSE)
-  # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |>  purrr::walk(function(dir){
-  basilisk::basiliskRun(fun = function(sce) {
-    dir(input_directory, full.names = TRUE) |>
-      purrr::map_chr(function(seurat_file){
-        cli::cli_alert_info("Processing {seurat_file}.")
-        prefix <- basename(seurat_file)
-        out_path <- glue::glue("{prefix}.h5ad") |>
-          file.path(output_directory, name=_)
-        
-        if (file.exists(out_path)) {
-          cli::cli_alert_info("{out_path} already exists. Skipping")
-        }
-        else {
-          sce <- 
-            LoadH5Seurat(seurat_file, assays = assays) |> 
-            Seurat::as.SingleCellExperiment()
-          
-          single_column <- length(colnames(sce)) == 1
-          if (single_column){
-            # Hack, so that single-column SCEs will convert 
-            # correctly
-            cli::cli_alert_info(
-              "{seurat_file} has only 1 column. Duplicating column."
-            )
-            sce <- cbind(sce, sce)
-            single_column <- TRUE
-          }
-          ad <- zellkonverter::SCE2AnnData(sce)
-          if (single_column){
-            # Remove the duplicate column
-            sce$X <- sce$X[1]
-          }
-          # TODO: customize chunking here, when anndata supports it
-          # (see https://github.com/scverse/anndata/issues/961)
-          ad$write_h5ad(out_path)
-        }
-        out_path
-      }, .progress = "Converting files")
-  }, env = zellkonverter::zellkonverterAnnDataEnv())
-}
+# Converts a series of H5-serialized Seurat to AnnData
+# @param input_directory A character scalar. The path to a directory containing one or more
+#  directories created by [SeuratDisk::SaveH5Seurat()].
+# @param output_directory A character scalar. The path to a directory in which to save the
+# created anndata files.
+# @keywords internal
+# @return A character vector of the newly-created anndata files
+# @noRd
+# @examples
+# \donttest{
+# h5seurat_to_anndata(
+#     "/vast/projects/cellxgene_curated/splitted_DB2_data_0.2.1",
+#     "/vast/projects/cellxgene_curated/splitted_DB2_anndata_0.2.1"
+# )
+# h5seurat_to_anndata(
+#     "/vast/projects/cellxgene_curated/splitted_DB2_data_scaled_0.2.1",
+#     "/vast/projects/cellxgene_curated/splitted_DB2_anndata_scaled_0.2.1"
+# )
+# }
+# h5seurat_to_anndata <- function(input_directory, output_directory, assays = "RNA"){
+#   
+#   # Check if package is loaded
+#   if(!"SeuratDisk" %in% (.packages()))
+#     stop("CuratedCellAtlas says: please manually load the SeuratDisk package first. Execute `library(SeuratDisk)`")
+#     
+#     
+#   dir.create(output_directory, showWarnings = FALSE)
+#   # This is a quick utility script to convert the SCE files into AnnData format for use in Pythonlist.files("/vast/projects/RCP/human_cell_atlas/splitted_DB2_data", full.names = FALSE) |>  purrr::walk(function(dir){
+#   basilisk::basiliskRun(fun = function(sce) {
+#     dir(input_directory, full.names = TRUE) |>
+#       purrr::map_chr(function(seurat_file){
+#         cli::cli_alert_info("Processing {seurat_file}.")
+#         prefix <- basename(seurat_file)
+#         out_path <- glue::glue("{prefix}.h5ad") |>
+#           file.path(output_directory, name=_)
+#         
+#         if (file.exists(out_path)) {
+#           cli::cli_alert_info("{out_path} already exists. Skipping")
+#         }
+#         else {
+#           sce <- SeuratDisk::LoadH5Seurat(seurat_file, assays = assays) |> 
+#             Seurat::as.SingleCellExperiment()
+#           
+#           single_column <- length(colnames(sce)) == 1
+#           if (single_column){
+#             # Hack, so that single-column SCEs will convert 
+#             # correctly
+#             cli::cli_alert_info(
+#               "{seurat_file} has only 1 column. Duplicating column."
+#             )
+#             sce <- cbind(sce, sce)
+#             single_column <- TRUE
+#           }
+#           ad <- zellkonverter::SCE2AnnData(sce)
+#           if (single_column){
+#             # Remove the duplicate column
+#             sce$X <- sce$X[1]
+#           }
+#           # TODO: customize chunking here, when anndata supports it
+#           # (see https://github.com/scverse/anndata/issues/961)
+#           ad$write_h5ad(out_path)
+#         }
+#         out_path
+#       }, .progress = "Converting files")
+#   }, env = zellkonverter::zellkonverterAnnDataEnv())
+# }
 
 #' Makes a "downsampled" metadata file that only contains the minimal data
 #' needed to run the vignette.
diff --git a/man/hdf5_to_anndata.Rd b/man/hdf5_to_anndata.Rd
diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd
@@ -215,7 +215,7 @@ single_cell_counts |> HDF5Array::saveHDF5SummarizedExperiment("single_cell_count
 
 We can gather all CD14 monocytes cells and plot the distribution of HLA-A across all tissues
 
-```{r}
+```{r, results='hide'}
 suppressPackageStartupMessages({
     library(ggplot2)
 })
@@ -251,7 +251,13 @@ counts |>
   theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) + 
   xlab("Disease") + 
   ggtitle("HLA-A in CD14 monocytes by disease") 
+```
+
+```{r echo=FALSE}
+find_figure("HLA_A_disease_plot.png") |> knitr::include_graphics()
+```
 
+```{r, results='hide'}
 # Plot by tissue
 counts |> 
   dplyr::with_groups(tissue_harmonised, ~ .x |> dplyr::mutate(median_count = median(`HLA.A`, rm.na=TRUE))) |> 
@@ -269,16 +275,8 @@ counts |>
   ggtitle("HLA-A in CD14 monocytes by tissue") + 
   theme(legend.position = "none")
 ```
-
-```{r}
-counts |> 
-  ggplot(aes( disease, `HLA.A`,color = file_id)) +
-  geom_jitter(shape=".") + 
-  theme_bw() +
-  theme(
-      axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1),
-      legend.position = "none"
-  )
+```{r echo=FALSE}
+find_figure("HLA_A_tissue_plot.png") |> knitr::include_graphics()
 ```
 
 ```{r}
@@ -299,7 +297,9 @@ metadata |>
       axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1),
       legend.position = "none"
   ) + 
-  geom_jitter(shape=".")
+  geom_jitter(shape=".") + 
+  xlab("Tissue") + 
+  ggtitle("HLA-A in nk cells by tissue")
 ```
 
 ## Obtain Unharmonised Metadata