@@ -38,22 +38,23 @@ library(CuratedAtlasQueryR)
3838### Load the metadata
3939
4040``` r
41- metadata <- get_metadata(cache_directory = " /stornext/Home/data/allstaff/m/milton.m/HCAquery/fake_cache" )
41+ # Note: in real applications you should use the default value of remote_url
42+ metadata <- get_metadata(remote_url = METADATA_URL )
4243metadata
43- # > # Source: table</stornext/Home/data/allstaff/m/ milton.m/HCAquery/fake_cache /metadata.0.2.3.parquet> [?? x 56]
44+ # > # Source: table</vast/scratch/users/ milton.m/cache/R/CuratedAtlasQueryR /metadata.0.2.3.parquet> [?? x 56]
4445# > # Database: DuckDB 0.7.1 [unknown@Linux 3.10.0-1160.88.1.el7.x86_64:R 4.2.1/:memory:]
4546# > cell_ sample_ cell_…¹ cell_…² confi…³ cell_…⁴ cell_…⁵ cell_…⁶ sampl…⁷ _samp…⁸
4647# > <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
47- # > 1 ACGC… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
48- # > 2 GACT… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
49- # > 3 TGTC… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
50- # > 4 GTTA… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
51- # > 5 CTCG… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
52- # > 6 CAAC… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
53- # > 7 GGTG… 188e17… classi… cd14 m… 1 cd14 m… monocy… classi… d035c6… HGR000 …
54- # > 8 GGTG… 188e17… classi… cd14 m … 1 cd14 m… monocy… classi… d035c6… HGR000 …
55- # > 9 GCTG… 188e17… classi… cd14 m … 1 cd14 m… monocy… interm… d035c6… HGR000 …
56- # > 10 ACCT… 188e17… classi… cd14 m… 1 cd14 m… monocy… classi… d035c6… HGR000 …
48+ # > 1 8387… 7bd7b8… natura… immune … 5 cd8 tem gmp natura… 842ce7… Q59___ …
49+ # > 2 1768… 7bd7b8… natura… immune … 5 cd8 tem cd8 tcm natura… 842ce7… Q59___ …
50+ # > 3 6329… 7bd7b8… natura… immune … 5 cd8 tem clp termin… 842ce7… Q59___ …
51+ # > 4 5027… 7bd7b8… natura… immune … 5 cd8 tem clp natura… 842ce7… Q59___ …
52+ # > 5 7956… 7bd7b8… natura… immune … 5 cd8 tem clp natura… 842ce7… Q59___ …
53+ # > 6 4305… 7bd7b8… natura… immune … 5 cd8 tem clp termin… 842ce7… Q59___ …
54+ # > 7 2126… 933f96… natura… ilc 1 nk nk natura… c250bf… AML3__ …
55+ # > 8 3114… 933f96… natura… immune … 5 mait nk natura… c250bf… AML3__ …
56+ # > 9 1407… 933f96… natura… immune … 5 mait clp natura… c250bf… AML3__ …
57+ # > 10 2911… 933f96… natura… nk 5 nk clp natura… c250bf… AML3__ …
5758# > # … with more rows, 46 more variables: assay <chr>,
5859# > # assay_ontology_term_id <chr>, file_id_db <chr>,
5960# > # cell_type_ontology_term_id <chr>, development_stage <chr>,
@@ -70,14 +71,20 @@ The `metadata` variable can then be re-used for all subsequent queries.
7071``` r
7172metadata | >
7273 dplyr :: distinct(tissue , file_id )
73- # > # Source: SQL [4 x 2]
74+ # > # Source: SQL [10 x 2]
7475# > # Database: DuckDB 0.7.1 [unknown@Linux 3.10.0-1160.88.1.el7.x86_64:R 4.2.1/:memory:]
75- # > tissue file_id
76- # > <chr> <chr>
77- # > 1 blood 1042ba0a-98c5-4816-897d-e192eb9303e3
78- # > 2 lung parenchyma 6661ab3a-792a-4682-b58c-4afb98b2c016
79- # > 3 respiratory airway 6661ab3a-792a-4682-b58c-4afb98b2c016
80- # > 4 nose 6661ab3a-792a-4682-b58c-4afb98b2c016
76+ # > tissue file_id
77+ # > <chr> <chr>
78+ # > 1 bone marrow 1ff5cbda-4d41-4f50-8c7e-cbe4a90e38db
79+ # > 2 lung parenchyma 6661ab3a-792a-4682-b58c-4afb98b2c016
80+ # > 3 respiratory airway 6661ab3a-792a-4682-b58c-4afb98b2c016
81+ # > 4 nose 6661ab3a-792a-4682-b58c-4afb98b2c016
82+ # > 5 renal pelvis dc9d8cdd-29ee-4c44-830c-6559cb3d0af6
83+ # > 6 kidney dc9d8cdd-29ee-4c44-830c-6559cb3d0af6
84+ # > 7 renal medulla dc9d8cdd-29ee-4c44-830c-6559cb3d0af6
85+ # > 8 cortex of kidney dc9d8cdd-29ee-4c44-830c-6559cb3d0af6
86+ # > 9 kidney blood vessel dc9d8cdd-29ee-4c44-830c-6559cb3d0af6
87+ # > 10 lung a2796032-d015-40c4-b9db-835207e5bd5b
8188```
8289
8390## Download single-cell RNA sequencing counts
@@ -287,10 +294,14 @@ HLA-A across all tissues
287294 #> ℹ Reading files.
288295 #> ℹ Compiling Single Cell Experiment.
289296 #> Warning: Transformation introduced infinite values in continuous y-axis
297+ #> Warning in min(x): no non-missing arguments to min; returning Inf
298+ #> Warning in max(x): no non-missing arguments to max; returning -Inf
290299
291300![ ] ( /stornext/Home/data/allstaff/m/milton.m/HCAquery/README_files/figure-gfm/unnamed-chunk-14-1.png ) <!-- -->
292301
293302 #> Warning: Transformation introduced infinite values in continuous y-axis
303+ #> Warning in min(x): no non-missing arguments to min; returning Inf
304+ #> Warning in max(x): no non-missing arguments to max; returning -Inf
294305
295306![ ] ( /stornext/Home/data/allstaff/m/milton.m/HCAquery/README_files/figure-gfm/unnamed-chunk-14-2.png ) <!-- -->
296307
@@ -311,14 +322,14 @@ counts |>
311322metadata | >
312323
313324 # Filter and subset
314- filter(cell_type_harmonised == " nk" ) | >
325+ dplyr :: filter(cell_type_harmonised == " nk" ) | >
315326
316327 # Get counts per million for HCA-A gene
317328 get_single_cell_experiment(assays = " cpm" , features = " HLA-A" ) | >
318329
319- # Plot (styling code have been omitted)
320- join_features(" HLA-A" , shape = " wide" ) | >
321- ggplot(aes( tissue_harmonised , `HLA.A` ,color = file_id )) +
330+ # Plot (styling code have been omitted)
331+ tidySingleCellExperiment :: join_features(" HLA-A" , shape = " wide" ) | >
332+ ggplot(aes(tissue_harmonised , `HLA.A` ,color = file_id )) +
322333 geom_jitter(shape = " ." )
323334# > ℹ Realising metadata.
324335# > ℹ Synchronising files
@@ -341,62 +352,39 @@ function returns a data frame with one row per dataset, including the
341352data frame.
342353
343354``` r
344- harmonised <- get_metadata() | > dplyr :: filter(tissue == " kidney blood vessel" )
355+ harmonised <- metadata | > dplyr :: filter(tissue == " kidney blood vessel" )
345356unharmonised <- get_unharmonised_metadata(harmonised )
346357unharmonised
347- # > # A tibble: 4 × 2
358+ # > # A tibble: 1 × 2
348359# > file_id unharmonised
349360# > <chr> <list>
350- # > 1 63523aa3-0d04-4fc6-ac59-5cadd3e73a14 <tbl_dck_[,17]>
351- # > 2 8fee7b82-178b-4c04-bf23-04689415690d <tbl_dck_[,12]>
352- # > 3 dc9d8cdd-29ee-4c44-830c-6559cb3d0af6 <tbl_dck_[,14]>
353- # > 4 f7e94dbb-8638-4616-aaf9-16e2212c369f <tbl_dck_[,14]>
361+ # > 1 dc9d8cdd-29ee-4c44-830c-6559cb3d0af6 <tbl_dck_[,14]>
354362```
355363
356364Notice that the columns differ between each dataset’s data frame:
357365
358366``` r
359367dplyr :: pull(unharmonised ) | > head(2 )
360368# > [[1]]
361- # > # Source: SQL [?? x 17 ]
369+ # > # Source: SQL [?? x 14 ]
362370# > # Database: DuckDB 0.7.1 [unknown@Linux 3.10.0-1160.88.1.el7.x86_64:R 4.2.1/:memory:]
363371# > cell_ file_id donor…¹ donor…² libra…³ mappe…⁴ sampl…⁵ suspe…⁶ suspe…⁷ autho…⁸
364372# > <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
365- # > 1 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
366- # > 2 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
367- # > 3 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
368- # > 4 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
369- # > 5 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
370- # > 6 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
371- # > 7 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
372- # > 8 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
373- # > 9 4602… 63523a… 19 mon… 463181… 671785… GENCOD… 125234… cell c7485e… CD4 T …
374- # > 10 4602… 63523a… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
375- # > # … with more rows, 7 more variables: cell_state <chr>,
376- # > # reported_diseases <chr>, Short_Sample <chr>, Project <chr>,
377- # > # Experiment <chr>, compartment <chr>, broad_celltype <chr>, and abbreviated
373+ # > 1 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
374+ # > 2 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
375+ # > 3 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
376+ # > 4 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
377+ # > 5 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
378+ # > 6 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
379+ # > 7 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
380+ # > 8 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
381+ # > 9 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
382+ # > 10 4602… dc9d8c… 27 mon… a8536b… 5ddaea… GENCOD… 61bf84… cell d8a44f… Pelvic…
383+ # > # … with more rows, 4 more variables: reported_diseases <chr>,
384+ # > # Experiment <chr>, Project <chr>, broad_celltype <chr>, and abbreviated
378385# > # variable names ¹donor_age, ²donor_uuid, ³library_uuid,
379386# > # ⁴mapped_reference_annotation, ⁵sample_uuid, ⁶suspension_type,
380387# > # ⁷suspension_uuid, ⁸author_cell_type
381- # >
382- # > [[2]]
383- # > # Source: SQL [?? x 12]
384- # > # Database: DuckDB 0.7.1 [unknown@Linux 3.10.0-1160.88.1.el7.x86_64:R 4.2.1/:memory:]
385- # > cell_ file_id orig.…¹ nCoun…² nFeat…³ seura…⁴ Project donor…⁵ compa…⁶ broad…⁷
386- # > <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
387- # > 1 1069 8fee7b… 4602ST… 16082 3997 25 Experi… Wilms3 non_PT Pelvic…
388- # > 2 1214 8fee7b… 4602ST… 1037 606 25 Experi… Wilms3 non_PT Pelvic…
389- # > 3 2583 8fee7b… 4602ST… 3028 1361 25 Experi… Wilms3 non_PT Pelvic…
390- # > 4 2655 8fee7b… 4602ST… 1605 859 25 Experi… Wilms3 non_PT Pelvic…
391- # > 5 3609 8fee7b… 4602ST… 1144 682 25 Experi… Wilms3 non_PT Pelvic…
392- # > 6 3624 8fee7b… 4602ST… 1874 963 25 Experi… Wilms3 non_PT Pelvic…
393- # > 7 3946 8fee7b… 4602ST… 1296 755 25 Experi… Wilms3 non_PT Pelvic…
394- # > 8 5163 8fee7b… 4602ST… 11417 3255 25 Experi… Wilms3 non_PT Pelvic…
395- # > 9 5446 8fee7b… 4602ST… 1769 946 19 Experi… Wilms2 lympho… CD4 T …
396- # > 10 6275 8fee7b… 4602ST… 3750 1559 25 Experi… Wilms3 non_PT Pelvic…
397- # > # … with more rows, 2 more variables: author_cell_type <chr>, Sample <chr>, and
398- # > # abbreviated variable names ¹orig.ident, ²nCount_RNA, ³nFeature_RNA,
399- # > # ⁴seurat_clusters, ⁵donor_id, ⁶compartment, ⁷broad_celltype
400388```
401389
402390# Cell metadata
@@ -484,13 +472,13 @@ sessionInfo()
484472# > [8] base
485473# >
486474# > other attached packages:
487- # > [1] ggplot2_3.4. 1 tidySingleCellExperiment_1.6.3
488- # > [3] SingleCellExperiment_1.18 .1 SummarizedExperiment_1.26.1
489- # > [5] Biobase_2.56 .0 GenomicRanges_1.48.0
490- # > [7] GenomeInfoDb_1.32.4 IRanges_2.30.1
491- # > [9] S4Vectors_0.34 .0 BiocGenerics_0.42.0
492- # > [11] MatrixGenerics_1.8.1 matrixStats_0.63.0
493- # > [13] ttservice_0.2.2 CuratedAtlasQueryR_0.99.1
475+ # > [1] tidySingleCellExperiment_1.6.3 SingleCellExperiment_1.18. 1
476+ # > [3] SummarizedExperiment_1.26 .1 Biobase_2.56.0
477+ # > [5] GenomicRanges_1.48 .0 GenomeInfoDb_1.32.4
478+ # > [7] IRanges_2.30.1 S4Vectors_0.34.0
479+ # > [9] BiocGenerics_0.42 .0 MatrixGenerics_1.8.1
480+ # > [11] matrixStats_0.63.0 ttservice_0.2.2
481+ # > [13] ggplot2_3.4.1 CuratedAtlasQueryR_0.99.1
494482# >
495483# > loaded via a namespace (and not attached):
496484# > [1] plyr_1.8.8 igraph_1.4.1 lazyeval_0.2.2
@@ -521,20 +509,20 @@ sessionInfo()
521509# > [76] goftest_1.2-3 knitr_1.42 fitdistrplus_1.1-8
522510# > [79] purrr_1.0.1 RANN_2.6.1 pbapply_1.6-0
523511# > [82] future_1.30.0 nlme_3.1-157 mime_0.12
524- # > [85] compiler_4.2.1 rstudioapi_0.14 plotly_4.10.1
525- # > [88] png_0.1-8 spatstat.utils_3.0-1 tibble_3.1.8
526- # > [91] bslib_0.4.2 stringi_1.7.12 highr_0.10
527- # > [94] forcats_1.0.0 lattice_0.20-45 Matrix_1.5-3
528- # > [97] vctrs_0.5.2 pillar_1.8.1 lifecycle_1.0.3
529- # > [100] rhdf5filters_1.8.0 spatstat.geom_3.0-3 lmtest_0.9-40
530- # > [103] jquerylib_0.1.4 RcppAnnoy_0.0.20 data.table_1.14.6
531- # > [106] cowplot_1.1.1 bitops_1.0-7 irlba_2.3.5.1
532- # > [109] httpuv_1.6.7 patchwork_1.1.2 R6_2.5.1
533- # > [112] promises_1.2.0. 1 KernSmooth_2.23-20 gridExtra_2.3
534- # > [115] parallelly_1.33.0 codetools_0.2-18 assertthat_0.2.1
535- # > [118] MASS_7.3-57 rhdf5_2.40.0 rprojroot_2.0.3
536- # > [121] withr_2.5.0 SeuratObject_4.1.3 sctransform_0.3.5
537- # > [124] GenomeInfoDbData_1.2.8 parallel_4.2.1 grid_4 .2.1
538- # > [127] tidyr_1.3.0 rmarkdown_2.20 Rtsne_0.16
539- # > [130] spatstat.explore_3.0-5 shiny_1.7.4
512+ # > [85] compiler_4.2.1 rstudioapi_0.14 curl_4.3.3
513+ # > [88] plotly_4.10.1 png_0.1-8 spatstat.utils_3.0-1
514+ # > [91] tibble_3.1.8 bslib_0.4.2 stringi_1.7.12
515+ # > [94] highr_0.10 forcats_1.0.0 lattice_0.20-45
516+ # > [97] Matrix_1.5-3 vctrs_0.5.2 pillar_1.8.1
517+ # > [100] lifecycle_1.0.3 rhdf5filters_1.8.0 spatstat.geom_3.0-3
518+ # > [103] lmtest_0.9-40 jquerylib_0.1.4 RcppAnnoy_0.0.20
519+ # > [106] data.table_1.14.6 cowplot_1.1.1 bitops_1.0-7
520+ # > [109] irlba_2.3.5.1 httpuv_1.6.7 patchwork_1.1.2
521+ # > [112] R6_2.5. 1 promises_1.2.0.1 KernSmooth_2.23-20
522+ # > [115] gridExtra_2.3 parallelly_1.33.0 codetools_0.2-18
523+ # > [118] assertthat_0.2.1 MASS_7.3-57 rhdf5_2.40.0
524+ # > [121] rprojroot_2.0.3 withr_2.5.0 SeuratObject_4.1.3
525+ # > [124] sctransform_0.3.5 GenomeInfoDbData_1.2.8 parallel_4 .2.1
526+ # > [127] grid_4.2.1 tidyr_1.3.0 rmarkdown_2.20
527+ # > [130] Rtsne_0.16 spatstat.explore_3.0-5 shiny_1.7.4
540528```
0 commit comments