|
| 1 | +--- |
| 2 | +title: "CuratedAtlasQueryR" |
| 3 | +output: github_document |
| 4 | +--- |
| 5 | + |
| 6 | +```{r, include = FALSE} |
| 7 | +# Note: knit this to the repo readme file using: |
| 8 | +# rmarkdown::render("vignettes/readme.Rmd", output_format = "github_document", output_dir = getwd()) |
| 9 | +knitr::opts_chunk$set( |
| 10 | + collapse = TRUE, |
| 11 | + comment = "#>" |
| 12 | +) |
| 13 | +``` |
| 14 | + |
| 15 | +```{r, echo=FALSE, out.height = "139px", out.width = "120px"} |
| 16 | +knitr::include_graphics("inst/logo.png") |
| 17 | +``` |
| 18 | + |
| 19 | +## Load the package |
| 20 | + |
| 21 | +```{r, message=FALSE, warning=FALSE} |
| 22 | +library(CuratedAtlasQueryR) |
| 23 | +library(dplyr) |
| 24 | +library(stringr) |
| 25 | +``` |
| 26 | + |
| 27 | +## Load and explore the metadata |
| 28 | + |
| 29 | +### Load the metadata |
| 30 | + |
| 31 | +```{r} |
| 32 | +get_metadata() |
| 33 | +``` |
| 34 | + |
| 35 | +### Explore the tissue |
| 36 | + |
| 37 | +```{r, eval=FALSE} |
| 38 | +get_metadata() |> |
| 39 | + dplyr::distinct(tissue, file_id) |
| 40 | +``` |
| 41 | + |
| 42 | +```{r} |
| 43 | +#> # Source: SQL [?? x 2] |
| 44 | +#> # Database: sqlite 3.40.0 [[email protected]:5432/metadata] |
| 45 | +#> # Ordered by: desc(n) |
| 46 | +#> tissue n |
| 47 | +#> <chr> <int64> |
| 48 | +#> 1 blood 47 |
| 49 | +#> 2 heart left ventricle 46 |
| 50 | +#> 3 cortex of kidney 31 |
| 51 | +#> 4 renal medulla 29 |
| 52 | +#> 5 lung 27 |
| 53 | +#> 6 liver 24 |
| 54 | +#> 7 middle temporal gyrus 24 |
| 55 | +#> 8 kidney 19 |
| 56 | +#> 9 intestine 18 |
| 57 | +#> 10 thymus 17 |
| 58 | +#> # … with more rows |
| 59 | +``` |
| 60 | + |
| 61 | + |
| 62 | +## Download single-cell RNA sequencing counts |
| 63 | + |
| 64 | +### Query raw counts |
| 65 | + |
| 66 | +```{r} |
| 67 | +
|
| 68 | +single_cell_counts = |
| 69 | + get_metadata() |> |
| 70 | + dplyr::filter( |
| 71 | + ethnicity == "African" & |
| 72 | + stringr::str_like(assay, "%10x%") & |
| 73 | + tissue == "lung parenchyma" & |
| 74 | + stringr::str_like(cell_type, "%CD4%") |
| 75 | + ) |> |
| 76 | + get_SingleCellExperiment() |
| 77 | +
|
| 78 | +single_cell_counts |
| 79 | +``` |
| 80 | + |
| 81 | +### Query counts scaled per million |
| 82 | + |
| 83 | +This is helpful if just few genes are of interest, as they can be compared across samples. |
| 84 | + |
| 85 | +```{r} |
| 86 | +single_cell_counts = |
| 87 | + get_metadata() |> |
| 88 | + dplyr::filter( |
| 89 | + ethnicity == "African" & |
| 90 | + stringr::str_like(assay, "%10x%") & |
| 91 | + tissue == "lung parenchyma" & |
| 92 | + stringr::str_like(cell_type, "%CD4%") |
| 93 | + ) |> |
| 94 | + get_SingleCellExperiment(assays = "cpm") |
| 95 | +
|
| 96 | +single_cell_counts |
| 97 | +``` |
| 98 | + |
| 99 | +### Extract only a subset of genes |
| 100 | + |
| 101 | +```{r} |
| 102 | +single_cell_counts = |
| 103 | + get_metadata() |> |
| 104 | + dplyr::filter( |
| 105 | + ethnicity == "African" & |
| 106 | + stringr::str_like(assay, "%10x%") & |
| 107 | + tissue == "lung parenchyma" & |
| 108 | + stringr::str_like(cell_type, "%CD4%") |
| 109 | + ) |> |
| 110 | + get_SingleCellExperiment(assays = "cpm", features = "PUM1") |
| 111 | +
|
| 112 | +single_cell_counts |
| 113 | +``` |
| 114 | + |
| 115 | +### Extract the counts as a Seurat object |
| 116 | + |
| 117 | +This convert the H5 SingleCellExperiment to Seurat so it might take long time and occupy a lot of memory dependeing on how many cells you are requesting. |
| 118 | + |
| 119 | + |
| 120 | +```{r} |
| 121 | +single_cell_counts = |
| 122 | + get_metadata() |> |
| 123 | + dplyr::filter( |
| 124 | + ethnicity == "African" & |
| 125 | + stringr::str_like(assay, "%10x%") & |
| 126 | + tissue == "lung parenchyma" & |
| 127 | + stringr::str_like(cell_type, "%CD4%") |
| 128 | + ) |> |
| 129 | + get_seurat() |
| 130 | +
|
| 131 | +single_cell_counts |
| 132 | +``` |
| 133 | + |
| 134 | +## Visualise gene transcription |
| 135 | + |
| 136 | +We can gather all natural killer cells and plot the distribution of CD56 (NCAM1) across all tissues |
| 137 | + |
| 138 | +```{r, eval=FALSE} |
| 139 | +library(tidySingleCellExperiment) |
| 140 | +library(ggplot2) |
| 141 | +
|
| 142 | +get_metadata() |> |
| 143 | + |
| 144 | + # Filter and subset |
| 145 | + filter(cell_type_harmonised=="nk") |> |
| 146 | + select(.cell, file_id_db, disease, file_id, tissue_harmonised) |> |
| 147 | + |
| 148 | + # Get counts per million for NCAM1 gene |
| 149 | + get_SingleCellExperiment(assays = "cpm", features = "NCAM1") |> |
| 150 | +
|
| 151 | + # Get transcriptional abundance for plotting with `tidySingleCellExperiment` |
| 152 | + join_features("NCAM1", shape = "wide") |> |
| 153 | + |
| 154 | + # Plot |
| 155 | + ggplot(aes( tissue_harmonised, NCAM1,color = file_id)) + |
| 156 | + geom_jitter(shape=".") + |
| 157 | + |
| 158 | + # Style |
| 159 | + guides(color="none") + |
| 160 | + scale_y_log10() + |
| 161 | + theme_bw() + |
| 162 | + theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) |
| 163 | +
|
| 164 | +``` |
| 165 | + |
| 166 | +```{r, echo=FALSE, message=FALSE, warning=FALSE} |
| 167 | +knitr::include_graphics("inst/NCAM1_figure.png") |
| 168 | +``` |
| 169 | + |
0 commit comments