Skip to content
This repository was archived by the owner on Oct 14, 2025. It is now read-only.

Commit 77dc3f7

Browse files
committed
update gene figures
1 parent 2635903 commit 77dc3f7

File tree

5 files changed

+243
-121
lines changed

5 files changed

+243
-121
lines changed

README.Rmd

Lines changed: 89 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -59,31 +59,14 @@ library(stringr)
5959
get_metadata()
6060
```
6161

62-
### Explore the tissue
62+
### Explore the number of datasets per tissue
6363

6464
```{r}
6565
get_metadata() |>
66-
dplyr::distinct(tissue, file_id)
66+
dplyr::distinct(tissue, dataset_id) |>
67+
dplyr::count(tissue)
6768
```
6869

69-
```{r}
70-
#> # Source: SQL [?? x 2]
71-
#> # Database: sqlite 3.40.0 [[email protected]:5432/metadata]
72-
#> # Ordered by: desc(n)
73-
#> tissue n
74-
#> <chr> <int64>
75-
#> 1 blood 47
76-
#> 2 heart left ventricle 46
77-
#> 3 cortex of kidney 31
78-
#> 4 renal medulla 29
79-
#> 5 lung 27
80-
#> 6 liver 24
81-
#> 7 middle temporal gyrus 24
82-
#> 8 kidney 19
83-
#> 9 intestine 18
84-
#> 10 thymus 17
85-
#> # … with more rows
86-
```
8770

8871

8972
## Download single-cell RNA sequencing counts
@@ -162,34 +145,110 @@ single_cell_counts
162145

163146
We can gather all natural killer cells and plot the distribution of CD56 (NCAM1) across all tissues
164147

148+
```{r, eval=FALSE, echo=FALSE}
149+
library(tidySingleCellExperiment)
150+
library(ggplot2)
151+
152+
# Plots with styling
153+
154+
# Plot by disease
155+
get_metadata() |>
156+
# Filter and subset
157+
filter(cell_type_harmonised=="cd14 mono") |>
158+
filter(file_id_db != "c5a05f23f9784a3be3bfa651198a48eb") |>
159+
160+
# Get counts per million for NCAM1 gene
161+
get_SingleCellExperiment(assays = "cpm", features = "HLA-A") |>
162+
163+
# Add feature to table
164+
join_features("HLA-A", shape = "wide") |>
165+
166+
# Rank x axis
167+
as_tibble() |>
168+
with_groups(disease, ~ .x |> mutate(median_count = median(`HLA.A`, rm.na=TRUE))) |>
169+
170+
# Plot
171+
ggplot(aes( fct_reorder(disease, median_count,.desc = TRUE), `HLA.A`,color = file_id)) +
172+
geom_jitter(shape=".") +
173+
174+
# Style
175+
guides(color="none") +
176+
scale_y_log10() +
177+
theme_bw() +
178+
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) +
179+
xlab("Disease") +
180+
ggtitle("HLA-A in CD14 monocytes. Coloured by dataset")
181+
182+
# Plot by tissue
183+
get_metadata() |>
184+
# Filter and subset
185+
filter(cell_type_harmonised=="cd14 mono") |>
186+
filter(file_id_db != "c5a05f23f9784a3be3bfa651198a48eb") |>
187+
188+
# Get counts per million for NCAM1 gene
189+
get_SingleCellExperiment(assays = "cpm", features = "HLA-A") |>
190+
191+
# Add feature to table
192+
join_features("HLA-A", shape = "wide") |>
193+
194+
# Rank x axis
195+
as_tibble() |>
196+
with_groups(tissue_harmonised, ~ .x |> mutate(median_count = median(`HLA.A`, rm.na=TRUE))) |>
197+
198+
# Plot
199+
ggplot(aes( fct_reorder(tissue_harmonised, median_count,.desc = TRUE), `HLA.A`,color = file_id)) +
200+
geom_jitter(shape=".") +
201+
202+
# Style
203+
guides(color="none") +
204+
scale_y_log10() +
205+
theme_bw() +
206+
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) +
207+
xlab("Disease") +
208+
ggtitle("HLA-A in CD14 monocytes. Coloured by dataset")
209+
210+
```
211+
165212
```{r, eval=FALSE}
166213
library(tidySingleCellExperiment)
167214
library(ggplot2)
168215
216+
get_metadata() |>
217+
# Filter and subset
218+
filter(cell_type_harmonised=="cd14 mono") |>
219+
220+
# Get counts per million for NCAM1 gene
221+
get_SingleCellExperiment(assays = "cpm", features = "HLA-A") |>
222+
223+
# Plot
224+
join_features("HLA-A", shape = "wide") |>
225+
ggplot(aes( disease, `HLA.A`,color = file_id)) +
226+
geom_jitter(shape=".")
227+
```
228+
229+
```{r, echo=FALSE, message=FALSE, warning=FALSE}
230+
knitr::include_graphics("man/figures/HLA_A_disease_plot.png")
231+
```
232+
233+
```{r, eval=FALSE}
234+
169235
get_metadata() |>
170236
171237
# Filter and subset
172238
filter(cell_type_harmonised=="nk") |>
173-
select(.cell, file_id_db, dataset_id, tissue_harmonised) |>
174-
239+
175240
# Get counts per million for NCAM1 gene
176241
get_SingleCellExperiment(assays = "cpm", features = "NCAM1") |>
177242
178243
# Plot
179244
join_features("NCAM1", shape = "wide") |>
180245
ggplot(aes( tissue_harmonised, NCAM1,color = file_id)) +
181-
geom_jitter(shape=".") +
182-
183-
# Style
184-
guides(color="none") +
185-
scale_y_log10() +
186-
theme_bw() +
187-
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1))
246+
geom_jitter(shape=".")
188247
189248
```
190249

191250
```{r, echo=FALSE, message=FALSE, warning=FALSE}
192-
knitr::include_graphics("man/figures/NCAM1_figure.png")
251+
knitr::include_graphics("man/figures/HLA_A_tissue_plot.png")
193252
```
194253

195254
# Cell metadata

README.md

Lines changed: 66 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -39,67 +39,49 @@ library(stringr)
3939

4040
``` r
4141
get_metadata()
42-
#> # Source: table</stornext/Home/data/allstaff/m/mangiola.s/.cache/R/CuratedAtlasQueryR/metadata.parquet> [?? x 56]
43-
#> # Database: DuckDB 0.6.2-dev1166 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
44-
#> .cell sampl…¹ .sample .samp…² assay assay…³ file_…⁴ cell_…⁵ cell_…⁶ devel…⁷
45-
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
46-
#> 1 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
47-
#> 2 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
48-
#> 3 AAACCT… 02eb2e… 5f20d7… D17PrP… 10x … EFO:00… 30f754… lumina… CL:000… 31-yea…
49-
#> 4 AAACCT… 02eb2e… 5f20d7… D17PrP… 10x … EFO:00… 30f754… lumina… CL:000… 31-yea…
50-
#> 5 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
51-
#> 6 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
52-
#> 7 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
53-
#> 8 AAACGG… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
54-
#> 9 AAACGG… 02eb2e… 5f20d7… D17PrP… 10x … EFO:00… 30f754… lumina… CL:000… 31-yea…
55-
#> 10 AAACGG… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
56-
#> # … with more rows, 46 more variables:
42+
#> # Source: table</stornext/Home/data/allstaff/m/mangiola.s/.cache/R/CuratedAtlasQueryR/metadata.0.2.2.parquet> [?? x 56]
43+
#> # Database: DuckDB 0.7.0 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
44+
#> `_cell` _samp…¹ cell_…² cell_…³ confi…⁴ cell_…⁵ cell_…⁶ cell_…⁷ sampl…⁸
45+
#> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
46+
#> 1 AAACCTGAGAGA… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
47+
#> 2 AAACCTGAGTTG… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
48+
#> 3 AAACCTGCAGTC… 689e2f… lumina… lumina… 1 <NA> <NA> <NA> 930938…
49+
#> 4 AAACCTGCAGTT… 689e2f… lumina… lumina… 1 <NA> <NA> <NA> 930938…
50+
#> 5 AAACCTGGTCTA… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
51+
#> 6 AAACCTGTCGTA… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
52+
#> 7 AAACCTGTCTTG… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
53+
#> 8 AAACGGGAGTAC… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
54+
#> 9 AAACGGGAGTAG… 689e2f… lumina… lumina… 1 <NA> <NA> <NA> 930938…
55+
#> 10 AAACGGGAGTGG… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
56+
#> # … with more rows, 47 more variables: `_sample_name` <chr>, assay <chr>,
57+
#> # assay_ontology_term_id <chr>, file_id_db <chr>,
58+
#> # cell_type_ontology_term_id <chr>, development_stage <chr>,
5759
#> # development_stage_ontology_term_id <chr>, disease <chr>,
5860
#> # disease_ontology_term_id <chr>, ethnicity <chr>,
59-
#> # ethnicity_ontology_term_id <chr>, file_id <chr>, is_primary_data.x <chr>,
60-
#> # organism <chr>, organism_ontology_term_id <chr>, sample_placeholder <chr>,
61-
#> # sex <chr>, sex_ontology_term_id <chr>, tissue <chr>,
62-
#> # tissue_ontology_term_id <chr>, tissue_harmonised <chr>, age_days <dbl>, …
61+
#> # ethnicity_ontology_term_id <chr>, experiment___ <chr>, file_id <chr>,
62+
#> # is_primary_data_x <chr>, organism <chr>, organism_ontology_term_id <chr>, …
6363
```
6464

65-
### Explore the tissue
65+
### Explore the number of datasets per tissue
6666

6767
``` r
6868
get_metadata() |>
69-
dplyr::distinct(tissue, file_id)
69+
dplyr::distinct(tissue, dataset_id) |>
70+
dplyr::count(tissue)
7071
#> # Source: SQL [?? x 2]
71-
#> # Database: DuckDB 0.6.2-dev1166 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
72-
#> tissue file_id
73-
#> <chr> <chr>
74-
#> 1 blood 07beec85-51be-4d73-bb80-8f85b7b643d5
75-
#> 2 lymph node 39b6cc45-8c5c-4f7b-944c-58f66da5efb1
76-
#> 3 middle temporal gyrus 39bbfb98-79d8-41a3-89f4-d82fde8eade1
77-
#> 4 middle temporal gyrus 3ac0ac73-ef77-416c-8e04-27d083bb7cd9
78-
#> 5 liver 3ae36927-c188-4511-88cc-572ee1edf906
79-
#> 6 cerebellum 5c1cc788-2645-45fb-b1d9-2f43d368bba8
80-
#> 7 interventricular septum 967fda08-9a6d-4ad7-aeac-ec9dd3bd8cfa
81-
#> 8 apex of heart 967fda08-9a6d-4ad7-aeac-ec9dd3bd8cfa
82-
#> 9 left cardiac atrium 967fda08-9a6d-4ad7-aeac-ec9dd3bd8cfa
83-
#> 10 heart left ventricle 967fda08-9a6d-4ad7-aeac-ec9dd3bd8cfa
84-
#> # … with more rows
85-
```
86-
87-
``` r
88-
#> # Source: SQL [?? x 2]
89-
#> # Database: sqlite 3.40.0 [[email protected]:5432/metadata]
90-
#> # Ordered by: desc(n)
91-
#> tissue n
92-
#> <chr> <int64>
93-
#> 1 blood 47
94-
#> 2 heart left ventricle 46
95-
#> 3 cortex of kidney 31
96-
#> 4 renal medulla 29
97-
#> 5 lung 27
98-
#> 6 liver 24
99-
#> 7 middle temporal gyrus 24
100-
#> 8 kidney 19
101-
#> 9 intestine 18
102-
#> 10 thymus 17
72+
#> # Database: DuckDB 0.7.0 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
73+
#> tissue n
74+
#> <chr> <dbl>
75+
#> 1 peripheral zone of prostate 10
76+
#> 2 transition zone of prostate 10
77+
#> 3 blood 47
78+
#> 4 intestine 18
79+
#> 5 middle temporal gyrus 24
80+
#> 6 heart left ventricle 46
81+
#> 7 apex of heart 16
82+
#> 8 heart right ventricle 16
83+
#> 9 left cardiac atrium 7
84+
#> 10 interventricular septum 16
10385
#> # … with more rows
10486
```
10587

@@ -125,15 +107,14 @@ single_cell_counts =
125107

126108
single_cell_counts
127109
#> class: SingleCellExperiment
128-
#> dim: 60661 1571
110+
#> dim: 35615 1571
129111
#> metadata(0):
130112
#> assays(2): counts cpm
131-
#> rownames(60661): TSPAN6 TNMD ... RP11-175I6.6 PRSS43P
113+
#> rownames(35615): TSPAN6 TNMD ... LNCDAT HRURF
132114
#> rowData names(0):
133115
#> colnames(1571): ACAGCCGGTCCGTTAA_F02526_1 GGGAATGAGCCCAGCT_F02526_1 ...
134116
#> TACAACGTCAGCATTG_SC84_1 CATTCGCTCAATACCG_F02526_1
135-
#> colData names(56): sample_id_db .sample ... n_tissue_in_cell_type
136-
#> original_cell_id
117+
#> colData names(56): _sample cell_type ... updated_at_y original_cell_id
137118
#> reducedDimNames(0):
138119
#> mainExpName: NULL
139120
#> altExpNames(0):
@@ -161,15 +142,14 @@ single_cell_counts =
161142

162143
single_cell_counts
163144
#> class: SingleCellExperiment
164-
#> dim: 60661 1571
145+
#> dim: 35615 1571
165146
#> metadata(0):
166147
#> assays(1): cpm
167-
#> rownames(60661): TSPAN6 TNMD ... RP11-175I6.6 PRSS43P
148+
#> rownames(35615): TSPAN6 TNMD ... LNCDAT HRURF
168149
#> rowData names(0):
169150
#> colnames(1571): ACAGCCGGTCCGTTAA_F02526_1 GGGAATGAGCCCAGCT_F02526_1 ...
170151
#> TACAACGTCAGCATTG_SC84_1 CATTCGCTCAATACCG_F02526_1
171-
#> colData names(56): sample_id_db .sample ... n_tissue_in_cell_type
172-
#> original_cell_id
152+
#> colData names(56): _sample cell_type ... updated_at_y original_cell_id
173153
#> reducedDimNames(0):
174154
#> mainExpName: NULL
175155
#> altExpNames(0):
@@ -201,8 +181,7 @@ single_cell_counts
201181
#> rowData names(0):
202182
#> colnames(1571): ACAGCCGGTCCGTTAA_F02526_1 GGGAATGAGCCCAGCT_F02526_1 ...
203183
#> TACAACGTCAGCATTG_SC84_1 CATTCGCTCAATACCG_F02526_1
204-
#> colData names(56): sample_id_db .sample ... n_tissue_in_cell_type
205-
#> original_cell_id
184+
#> colData names(56): _sample cell_type ... updated_at_y original_cell_id
206185
#> reducedDimNames(0):
207186
#> mainExpName: NULL
208187
#> altExpNames(0):
@@ -228,13 +207,13 @@ single_cell_counts =
228207
#> ℹ Synchronising files
229208
#> ℹ Reading files.
230209
#> ℹ Compiling Single Cell Experiment.
231-
#> Warning: Feature names cannot have underscores ('_'), replacing with dashes
232-
#> ('-')
210+
#> Warning: Non-unique features (rownames) present in the input matrix, making
211+
#> unique
233212

234213
single_cell_counts
235214
#> An object of class Seurat
236-
#> 60661 features across 1571 samples within 1 assay
237-
#> Active assay: originalexp (60661 features, 0 variable features)
215+
#> 35615 features across 1571 samples within 1 assay
216+
#> Active assay: originalexp (35615 features, 0 variable features)
238217
```
239218

240219
## Visualise gene transcription
@@ -246,28 +225,38 @@ We can gather all natural killer cells and plot the distribution of CD56
246225
library(tidySingleCellExperiment)
247226
library(ggplot2)
248227

228+
get_metadata() |>
229+
# Filter and subset
230+
filter(cell_type_harmonised=="cd14 mono") |>
231+
232+
# Get counts per million for NCAM1 gene
233+
get_SingleCellExperiment(assays = "cpm", features = "HLA-A") |>
234+
235+
# Plot
236+
join_features("HLA-A", shape = "wide") |>
237+
ggplot(aes( disease, `HLA.A`,color = file_id)) +
238+
geom_jitter(shape=".")
239+
```
240+
241+
<img src="man/figures/HLA_A_disease_plot.png" width="497" />
242+
243+
``` r
244+
249245
get_metadata() |>
250246

251247
# Filter and subset
252248
filter(cell_type_harmonised=="nk") |>
253-
select(.cell, file_id_db, dataset_id, tissue_harmonised) |>
254-
249+
255250
# Get counts per million for NCAM1 gene
256251
get_SingleCellExperiment(assays = "cpm", features = "NCAM1") |>
257252

258253
# Plot
259254
join_features("NCAM1", shape = "wide") |>
260255
ggplot(aes( tissue_harmonised, NCAM1,color = file_id)) +
261-
geom_jitter(shape=".") +
262-
263-
# Style
264-
guides(color="none") +
265-
scale_y_log10() +
266-
theme_bw() +
267-
theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1))
256+
geom_jitter(shape=".")
268257
```
269258

270-
<img src="man/figures/NCAM1_figure.png" width="629" />
259+
<img src="man/figures/HLA_A_tissue_plot.png" width="499" />
271260

272261
# Cell metadata
273262

man/figures/HLA_A_disease_plot.png

61.7 KB
Loading

man/figures/HLA_A_tissue_plot.png

56 KB
Loading

0 commit comments

Comments
 (0)