@@ -39,27 +39,27 @@ library(stringr)
3939
4040``` r
4141get_metadata()
42- # > # Source: table</stornext/Home/data/allstaff/m/mangiola.s/.cache/R/CuratedAtlasQueryR/metadata.parquet> [?? x 56]
43- # > # Database: DuckDB 0.6.2-dev1166 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
44- # > .cell sampl…¹ .sample .samp…² assay assay…³ file_…⁴ cell_…⁵ cell_…⁶ devel…⁷
45- # > <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
46- # > 1 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
47- # > 2 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
48- # > 3 AAACCT… 02eb2e… 5f20d7… D17PrP… 10x … EFO:00… 30f754… lumina… CL:000… 31-yea…
49- # > 4 AAACCT… 02eb2e… 5f20d7… D17PrP… 10x … EFO:00… 30f754… lumina… CL:000… 31-yea…
50- # > 5 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
51- # > 6 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
52- # > 7 AAACCT… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
53- # > 8 AAACGG… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
54- # > 9 AAACGG… 02eb2e… 5f20d7… D17PrP… 10x … EFO:00… 30f754… lumina… CL:000… 31-yea…
55- # > 10 AAACGG… 8a0fe0… 5f20d7… D17PrP… 10x … EFO:00… 1e334b… basal … CL:000… 31-yea…
56- # > # … with more rows, 46 more variables:
42+ # > # Source: table</stornext/Home/data/allstaff/m/mangiola.s/.cache/R/CuratedAtlasQueryR/metadata.0.2.2.parquet> [?? x 56]
43+ # > # Database: DuckDB 0.7.0 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
44+ # > `_cell` _samp…¹ cell_…² cell_…³ confi…⁴ cell_…⁵ cell_…⁶ cell_…⁷ sampl…⁸
45+ # > <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
46+ # > 1 AAACCTGAGAGA… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
47+ # > 2 AAACCTGAGTTG… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
48+ # > 3 AAACCTGCAGTC… 689e2f… lumina… lumina… 1 <NA> <NA> <NA> 930938…
49+ # > 4 AAACCTGCAGTT… 689e2f… lumina… lumina… 1 <NA> <NA> <NA> 930938…
50+ # > 5 AAACCTGGTCTA… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
51+ # > 6 AAACCTGTCGTA… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
52+ # > 7 AAACCTGTCTTG… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
53+ # > 8 AAACGGGAGTAC… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
54+ # > 9 AAACGGGAGTAG… 689e2f… lumina… lumina… 1 <NA> <NA> <NA> 930938…
55+ # > 10 AAACGGGAGTGG… 689e2f… basal … basal_… 1 <NA> <NA> <NA> f297c7…
56+ # > # … with more rows, 47 more variables: `_sample_name` <chr>, assay <chr>,
57+ # > # assay_ontology_term_id <chr>, file_id_db <chr>,
58+ # > # cell_type_ontology_term_id <chr>, development_stage <chr>,
5759# > # development_stage_ontology_term_id <chr>, disease <chr>,
5860# > # disease_ontology_term_id <chr>, ethnicity <chr>,
59- # > # ethnicity_ontology_term_id <chr>, file_id <chr>, is_primary_data.x <chr>,
60- # > # organism <chr>, organism_ontology_term_id <chr>, sample_placeholder <chr>,
61- # > # sex <chr>, sex_ontology_term_id <chr>, tissue <chr>,
62- # > # tissue_ontology_term_id <chr>, tissue_harmonised <chr>, age_days <dbl>, …
61+ # > # ethnicity_ontology_term_id <chr>, experiment___ <chr>, file_id <chr>,
62+ # > # is_primary_data_x <chr>, organism <chr>, organism_ontology_term_id <chr>, …
6363```
6464
6565### Explore the tissue
@@ -68,19 +68,19 @@ get_metadata()
6868get_metadata() | >
6969 dplyr :: distinct(tissue , file_id )
7070# > # Source: SQL [?? x 2]
71- # > # Database: DuckDB 0.6.2-dev1166 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
72- # > tissue file_id
73- # > <chr> <chr>
74- # > 1 cortex of kidney 2977b3fa-e4d6-4929-8540-ae12d33a3c53
75- # > 2 entorhinal cortex 29d5d028-6f90-4943-91f7-fa3f93731de8
76- # > 3 middle temporal gyrus 2a689fda-d335-4ac0-81b1-a356fdf939db
77- # > 4 respiratory airway 2c2d5bea-8be7-4227-8a56-f2a85d57fa56
78- # > 5 thymus 2ec94470-8171-4825-8346-34d77383438b
79- # > 6 ileum 2f893c30-c91b-487b-b1f8-b386a4ef91a3
80- # > 7 cortex of kidney 2fe625b6-8c85-4818-a9c5-1189303f6e2b
81- # > 8 fimbria of uterine tube 3044b5dd-a499-456e-86d9-94769bc3b63e
82- # > 9 ampulla of uterine tube 3044b5dd-a499-456e-86d9-94769bc3b63e
83- # > 10 temporal cortex 4e4bbb2d-f341-4523-a5a0-5407d8b03e0e
71+ # > # Database: DuckDB 0.7.0 [unknown@Linux 3.10.0-1160.81.1.el7.x86_64:R 4.2.0/:memory:]
72+ # > tissue file_id
73+ # > <chr> <chr>
74+ # > 1 renal medulla 52cb5191-2976-4077-ba88-47c76692bef0
75+ # > 2 pancreas 53329245-06f3-45a4-bf15-ed61f628ff83
76+ # > 3 blood 5500774a-6ebe-4ddf-adce-90302b7cd007
77+ # > 4 blood 550760cb-ede9-4e6b-b6ab-7152f2ce29e1
78+ # > 5 intestine 556bb449-bbef-43d3-9487-87031fc0decb
79+ # > 6 lung 56e0359f-ee8d-4ba5-a51d-159a183643e5
80+ # > 7 adrenal gland 56e0359f-ee8d-4ba5-a51d-159a183643e5
81+ # > 8 pleural effusion 56e0359f-ee8d-4ba5-a51d-159a183643e5
82+ # > 9 liver 56e0359f-ee8d-4ba5-a51d-159a183643e5
83+ # > 10 lymph node 56e0359f-ee8d-4ba5-a51d-159a183643e5
8484# > # … with more rows
8585```
8686
@@ -125,15 +125,14 @@ single_cell_counts =
125125
126126single_cell_counts
127127# > class: SingleCellExperiment
128- # > dim: 60661 1571
128+ # > dim: 35615 1571
129129# > metadata(0):
130130# > assays(2): counts cpm
131- # > rownames(60661 ): TSPAN6 TNMD ... RP11-175I6.6 PRSS43P
131+ # > rownames(35615 ): TSPAN6 TNMD ... LNCDAT HRURF
132132# > rowData names(0):
133133# > colnames(1571): ACAGCCGGTCCGTTAA_F02526_1 GGGAATGAGCCCAGCT_F02526_1 ...
134134# > TACAACGTCAGCATTG_SC84_1 CATTCGCTCAATACCG_F02526_1
135- # > colData names(56): sample_id_db .sample ... n_tissue_in_cell_type
136- # > original_cell_id
135+ # > colData names(56): _sample cell_type ... updated_at_y original_cell_id
137136# > reducedDimNames(0):
138137# > mainExpName: NULL
139138# > altExpNames(0):
@@ -161,15 +160,14 @@ single_cell_counts =
161160
162161single_cell_counts
163162# > class: SingleCellExperiment
164- # > dim: 60661 1571
163+ # > dim: 35615 1571
165164# > metadata(0):
166165# > assays(1): cpm
167- # > rownames(60661 ): TSPAN6 TNMD ... RP11-175I6.6 PRSS43P
166+ # > rownames(35615 ): TSPAN6 TNMD ... LNCDAT HRURF
168167# > rowData names(0):
169168# > colnames(1571): ACAGCCGGTCCGTTAA_F02526_1 GGGAATGAGCCCAGCT_F02526_1 ...
170169# > TACAACGTCAGCATTG_SC84_1 CATTCGCTCAATACCG_F02526_1
171- # > colData names(56): sample_id_db .sample ... n_tissue_in_cell_type
172- # > original_cell_id
170+ # > colData names(56): _sample cell_type ... updated_at_y original_cell_id
173171# > reducedDimNames(0):
174172# > mainExpName: NULL
175173# > altExpNames(0):
@@ -201,8 +199,7 @@ single_cell_counts
201199# > rowData names(0):
202200# > colnames(1571): ACAGCCGGTCCGTTAA_F02526_1 GGGAATGAGCCCAGCT_F02526_1 ...
203201# > TACAACGTCAGCATTG_SC84_1 CATTCGCTCAATACCG_F02526_1
204- # > colData names(56): sample_id_db .sample ... n_tissue_in_cell_type
205- # > original_cell_id
202+ # > colData names(56): _sample cell_type ... updated_at_y original_cell_id
206203# > reducedDimNames(0):
207204# > mainExpName: NULL
208205# > altExpNames(0):
@@ -228,13 +225,13 @@ single_cell_counts =
228225# > ℹ Synchronising files
229226# > ℹ Reading files.
230227# > ℹ Compiling Single Cell Experiment.
231- # > Warning: Feature names cannot have underscores ('_'), replacing with dashes
232- # > ('-')
228+ # > Warning: Non-unique features (rownames) present in the input matrix, making
229+ # > unique
233230
234231single_cell_counts
235232# > An object of class Seurat
236- # > 60661 features across 1571 samples within 1 assay
237- # > Active assay: originalexp (60661 features, 0 variable features)
233+ # > 35615 features across 1571 samples within 1 assay
234+ # > Active assay: originalexp (35615 features, 0 variable features)
238235```
239236
240237## Visualise gene transcription
@@ -250,7 +247,7 @@ get_metadata() |>
250247
251248 # Filter and subset
252249 filter(cell_type_harmonised == " nk" ) | >
253- select(.cell , file_id_db , disease , file_id , tissue_harmonised ) | >
250+ select(cell_ , file_id_db , disease , file_id , tissue_harmonised ) | >
254251
255252 # Get counts per million for NCAM1 gene
256253 get_SingleCellExperiment(assays = " cpm" , features = " NCAM1" ) | >
@@ -286,9 +283,8 @@ cellxgene.cziscience.com)
286283Sample-specific columns (definitions available at
287284cellxgene.cziscience.com)
288285
289- ` .sample ` , ` .sample_name ` , ` age_days ` , ` assay ` ,
290- ` assay_ontology_term_id ` , ` development_stage ` ,
291- ` development_stage_ontology_term_id ` , ` ethnicity ` ,
286+ ` sample_ ` , ` sample_name ` , ` age_days ` , ` assay ` , ` assay_ontology_term_id ` ,
287+ ` development_stage ` , ` development_stage_ontology_term_id ` , ` ethnicity ` ,
292288` ethnicity_ontology_term_id ` , ` experiment___ ` , ` organism ` ,
293289` organism_ontology_term_id ` , ` sample_placeholder ` , ` sex ` ,
294290` sex_ontology_term_id ` , ` tissue ` , ` tissue_harmonised ` ,
@@ -298,7 +294,7 @@ cellxgene.cziscience.com)
298294Cell-specific columns (definitions available at
299295cellxgene.cziscience.com)
300296
301- ` .cell ` , ` cell_type ` , ` cell_type_ontology_term_idm ` ,
297+ ` cell_ ` , ` cell_type ` , ` cell_type_ontology_term_idm ` ,
302298` cell_type_harmonised ` , ` confidence_class ` ,
303299` cell_annotation_azimuth_l2 ` , ` cell_annotation_blueprint_singler `
304300
@@ -320,8 +316,8 @@ present in the original CELLxGENE metadata
320316 Monaco reference
321317- ` sample_id_db ` : Sample subdivision for internal use
322318- ` file_id_db ` : File subdivision for internal use
323- - ` .sample ` : Sample ID
324- - ` . sample_name` : How samples were defined
319+ - ` sample_ ` : Sample ID
320+ - ` sample_name ` : How samples were defined
325321
326322# RNA abundance
327323
0 commit comments