@@ -16,7 +16,7 @@ knitr::opts_chunk$set(
1616)
1717```
1818
19- ``` {r setup , echo=FALSE, message=FALSE}
19+ ``` {r load_packages , echo=FALSE, message=FALSE}
2020library(SuperCellCyto)
2121library(parallel)
2222library(BiocParallel)
@@ -61,7 +61,7 @@ with each sample containing 10,000 cells.
6161Hence in total, we will have a toy dataset containing 15 markers and
626230,000 cells.
6363
64- ``` {r}
64+ ``` {r simulate_data }
6565n_markers <- 15
6666n_samples <- 3
6767dat <- simCytoData(nmarkers = n_markers, ncells = rep(10000, n_samples))
@@ -71,7 +71,7 @@ head(dat)
7171For our toy dataset, we will transform our data using arcsinh transformation.
7272We will use the base R ` asinh ` function to do this:
7373
74- ``` {r}
74+ ``` {r arcsinh_transformation }
7575# Specify which columns are the markers to transform
7676marker_cols <- paste0("Marker_", seq_len(n_markers))
7777# The co-factor for arc-sinh
@@ -94,7 +94,7 @@ We will also create a column *Cell_id_dummy* which uniquely identify each cell.
9494It will have values such as ` Cell_1, Cell_2, ` all the way until ` Cell_x `
9595where x is the number of cells in the dataset.
9696
97- ``` {r}
97+ ``` {r create_cell_id }
9898dat$Cell_id_dummy <- paste0("Cell_", seq_len(nrow(dat)))
9999head(dat$Cell_id_dummy, n = 10)
100100```
@@ -103,13 +103,13 @@ By default, the `simCytoData` function will generate cells for multiple samples,
103103and that the resulting ` data.table ` object will already have a column
104104called * Sample* that denotes the sample the cells come from.
105105
106- ``` {r}
106+ ``` {r check_sample_col }
107107unique(dat$Sample)
108108```
109109
110110Let's take note of the sample and cell id column for later.
111111
112- ``` {r}
112+ ``` {r set_colnames }
113113sample_col <- "Sample"
114114cell_id_col <- "Cell_id_dummy"
115115```
@@ -131,7 +131,7 @@ your data, then make sure you specify them in a vector that you later pass to
131131For this tutorial, we will use all the arcsinh transformed markers in the
132132toy data.
133133
134- ``` {r}
134+ ``` {r run_supercellcyto }
135135supercells <- runSuperCellCyto(
136136 dt = dat,
137137 markers = marker_cols_asinh,
@@ -142,13 +142,13 @@ supercells <- runSuperCellCyto(
142142
143143Let's dig deeper into the object it created:
144144
145- ``` {r}
145+ ``` {r check_supercells_class }
146146class(supercells)
147147```
148148
149149It is a list containing 3 elements:
150150
151- ``` {r}
151+ ``` {r check_supercells_names }
152152names(supercells)
153153```
154154
@@ -171,7 +171,7 @@ supercell.
171171These are calculated by taking the average of the marker expression of
172172all the cells contained within a supercell.
173173
174- ``` {r}
174+ ``` {r show_supercell_expr_matrix }
175175head(supercells$supercell_expression_matrix)
176176```
177177
@@ -188,7 +188,7 @@ variable).
188188
189189Let's have a look at ` SuperCellId ` :
190190
191- ``` {r}
191+ ``` {r show_supercell_ids }
192192head(unique(supercells$supercell_expression_matrix$SuperCellId))
193193```
194194
@@ -198,7 +198,7 @@ a sample) used to uniquely identify each supercell in a sample.
198198Notably, you may encounter this (` SuperCell_1 ` , ` SuperCell_2 ` ) being repeated
199199across different samples, e.g.,
200200
201- ``` {r}
201+ ``` {r show_supercell_1_ids }
202202supercell_ids <- unique(supercells$supercell_expression_matrix$SuperCellId)
203203supercell_ids[grep("SuperCell_1_", supercell_ids)]
204204```
@@ -217,7 +217,7 @@ This aids in differentiating the supercells in different samples.
217217` supercell_cell_map ` maps each cell in our dataset to the supercell it
218218belongs to.
219219
220- ``` {r}
220+ ``` {r show_supercell_cell_map }
221221head(supercells$supercell_cell_map)
222222```
223223
@@ -244,7 +244,7 @@ As each sample will be processed by a parallel job, we don't want a job that
244244processs large sample to also be assigned other smaller samples if possible.
245245If you want to know more how this feature works, please refer to our manuscript.
246246
247- ``` {r}
247+ ``` {r run_supercellcyto_parallel }
248248supercell_par <- runSuperCellCyto(
249249 dt = dat,
250250 markers = marker_cols_asinh,
@@ -307,7 +307,7 @@ toy dataset, we will regenerate the supercells using gamma of 10 and 50.
307307The function to do this is ` recomputeSupercells ` .
308308We will store the output in a list, one element per gamma value.
309309
310- ``` {r}
310+ ``` {r recompute_supercells }
311311addt_gamma_vals <- c(10, 50)
312312supercells_addt_gamma <- lapply(addt_gamma_vals, function(gam) {
313313 recomputeSupercells(
@@ -325,7 +325,7 @@ We should end up with a list containing 2 elements.
325325The 1st element contains supercells generated using gamma = 10,
326326and the 2nd contains supercells generated using gamma = 50.
327327
328- ``` {r}
328+ ``` {r show_supercells_gamma10 }
329329supercells_addt_gamma[[1]]
330330```
331331
@@ -341,7 +341,7 @@ Compared to the previous run where gamma was set to 20, we should get more
341341supercells for gamma = 10, and less for gamma = 50.
342342Let's see if that's the case.
343343
344- ``` {r}
344+ ``` {r count_supercells }
345345n_supercells_gamma20 <- nrow(supercells$supercell_expression_matrix)
346346n_supercells_gamma10 <- nrow(
347347 supercells_addt_gamma[[1]]$supercell_expression_matrix
@@ -351,11 +351,11 @@ n_supercells_gamma50 <- nrow(
351351)
352352```
353353
354- ``` {r}
354+ ``` {r gamma10_gt_gamma20 }
355355n_supercells_gamma10 > n_supercells_gamma20
356356```
357357
358- ``` {r}
358+ ``` {r gamma50_lt_gamma20 }
359359n_supercells_gamma50 < n_supercells_gamma20
360360```
361361
@@ -369,7 +369,7 @@ and run `runSuperCellCyto`
369369function on each of them with different ` gam ` parameter value.
370370Something like the following:
371371
372- ``` {r}
372+ ``` {r diff_gamma_per_sample }
373373n_markers <- 10
374374dat <- simCytoData(nmarkers = n_markers)
375375markers_col <- paste0("Marker_", seq_len(n_markers))
@@ -397,7 +397,7 @@ supercells_diff_gam <- lapply(seq_len(length(samples)), function(i) {
397397Subsequently, to extract and combine the ` supercell_expression_matrix ` and
398398` supercell_cell_map ` , we will need to use ` rbind ` :
399399
400- ``` {r}
400+ ``` {r combine_supercell_results }
401401supercell_expression_matrix <- do.call(
402402 "rbind", lapply(
403403 supercells_diff_gam, function(x) x[["supercell_expression_matrix"]]
@@ -411,14 +411,14 @@ supercell_cell_map <- do.call(
411411)
412412```
413413
414- ``` {r}
414+ ``` {r show_combined_expr_matrix }
415415rbind(
416416 head(supercell_expression_matrix, n = 3),
417417 tail(supercell_expression_matrix, n = 3)
418418)
419419```
420420
421- ``` {r}
421+ ``` {r show_combined_cell_map }
422422rbind(head(supercell_cell_map, n = 3), tail(supercell_cell_map, n = 3))
423423```
424424
@@ -463,6 +463,6 @@ load the relevant output saved using the qs package and the relevant data
463463` recomputeSupercells ` function.
464464
465465## Session information
466- ``` {r}
466+ ``` {r session_info }
467467sessionInfo()
468468```
0 commit comments