1111from nomenclature_tools import nomenclature_2_nodes_n_edges
1212from pcl_id_factory import get_class_id , get_individual_id , get_taxonomy_id , get_dataset_id , get_marker_gene_set_id
1313
14-
1514log = logging .getLogger (__name__ )
1615
1716PCL_BASE = 'http://purl.obolibrary.org/obo/PCL_'
2423ENSEMBLE_PATH = os .path .join (os .path .dirname (os .path .realpath (__file__ )), "../templates/{}.tsv" )
2524CROSS_SPECIES_PATH = os .path .join (os .path .dirname (os .path .realpath (__file__ )),
2625 "../dendrograms/nomenclature_table_CCN202002270.csv" )
27- ALLEN_DESCRIPTIONS_PATH = os .path .join (os .path .dirname (os .path .realpath (__file__ )),
28- '../dendrograms/MOp_taxonomies_ontology/All Descriptions_{}.json' )
26+
27+ # centralized data files
28+ ALLEN_DESCRIPTIONS_PATH = "{}/{}/All Descriptions_{}.json"
29+ DATASET_INFO_CSV = "{}/{}/{}_landingpage_dataset_info.csv"
30+ TAXONOMY_INFO_CSV = "{}/{}/{}_Taxonomy_Info_Panel.csv"
2931
3032EXPRESSION_SEPARATOR = "|"
3133
3234
33- def generate_ind_template (taxonomy_file_path , output_filepath ):
35+ def generate_ind_template (taxonomy_file_path , centralized_data_folder , output_filepath ):
3436 path_parts = taxonomy_file_path .split (os .path .sep )
3537 taxon = path_parts [len (path_parts ) - 1 ].split ("." )[0 ]
3638
@@ -42,7 +44,11 @@ def generate_ind_template(taxonomy_file_path, output_filepath):
4244
4345 dend_tree = generate_dendrogram_tree (dend )
4446 taxonomy_config = read_taxonomy_config (taxon )
45- allen_descriptions = read_allen_descriptions (ALLEN_DESCRIPTIONS_PATH , taxonomy_config ['Species_abbv' ][0 ])
47+
48+ taxonomy_folder_name = get_centralized_taxonomy_folder (taxonomy_config )
49+ allen_desc_file = ALLEN_DESCRIPTIONS_PATH .format (centralized_data_folder , taxonomy_folder_name ,
50+ taxonomy_config ['Species_abbv' ][0 ])
51+ allen_descriptions = read_allen_descriptions (allen_desc_file )
4652
4753 subtrees = get_subtrees (dend_tree , taxonomy_config )
4854
@@ -108,7 +114,6 @@ def generate_ind_template(taxonomy_file_path, output_filepath):
108114 if allen_data ["aliases" ][0 ]:
109115 d ['Aliases' ] = '|' .join ([alias .strip () for alias in str (allen_data ["aliases" ][0 ]).split ("|" )])
110116
111- # There should only be one!
112117 dl .append (d )
113118 robot_template = pd .DataFrame .from_records (dl )
114119 robot_template .to_csv (output_filepath , sep = "\t " , index = False )
@@ -382,9 +387,8 @@ def generate_cross_species_template(taxonomy_file_path, output_filepath):
382387 class_robot_template .to_csv (output_filepath , sep = "\t " , index = False )
383388
384389
385- def generate_taxonomies_template (taxonomy_metadata_path , output_filepath ):
390+ def generate_taxonomies_template (centralized_data_folder , output_filepath ):
386391 taxon_configs = read_taxonomy_details_yaml ()
387- headers , taxonomies_metadata = read_csv_to_dict (taxonomy_metadata_path )
388392
389393 robot_template_seed = {'ID' : 'ID' ,
390394 'TYPE' : 'TYPE' ,
@@ -397,7 +401,12 @@ def generate_taxonomies_template(taxonomy_metadata_path, output_filepath):
397401 'Species Label' : "A skos:prefLabel" ,
398402 'Age' : "A 'has_age'" ,
399403 'Sex' : "A 'has_sex'" ,
400- 'Primary Citation' : "A oboInOwl:hasDbXref"
404+ 'Primary Citation' : "A oboInOwl:hasDbXref" ,
405+ 'Title' : "A dcterms:title" ,
406+ 'Description' : "A rdfs:comment" ,
407+ 'Attribution' : "A rdfs:provenance" ,
408+ 'SubDescription' : "A rdfs:description" ,
409+ 'Anatomy' : "A rdfs:subject"
401410 }
402411 dl = [robot_template_seed ]
403412
@@ -409,60 +418,89 @@ def generate_taxonomies_template(taxonomy_metadata_path, output_filepath):
409418 d ['Label' ] = taxon_config ["Taxonomy_id" ]
410419 d ['Anatomic Region' ] = taxon_config ['Brain_region' ][0 ]
411420 d ['Primary Citation' ] = taxon_config ['PMID' ][0 ]
412- if taxon_config ["Taxonomy_id" ] in taxonomies_metadata :
413- taxonomy_metadata = taxonomies_metadata [taxon_config ["Taxonomy_id" ]]
414- d ['Number of Cell Types' ] = taxonomy_metadata ["Cell Types" ]
415- d ['Number of Cell Subclasses' ] = taxonomy_metadata ["Cell Subclasses" ]
416- d ['Number of Cell Classes' ] = taxonomy_metadata ["Cell Classes" ]
417- d ['Species Label' ] = taxonomy_metadata ["Species" ]
418- d ['Age' ] = taxonomy_metadata ["Age" ]
419- d ['Sex' ] = taxonomy_metadata ["Sex" ]
421+
422+ add_taxonomy_info_panel_properties (centralized_data_folder , d , taxon_config )
420423
421424 dl .append (d )
422425 robot_template = pd .DataFrame .from_records (dl )
423426 robot_template .to_csv (output_filepath , sep = "\t " , index = False )
424427
425428
426- def generate_datasets_template (dataset_metadata_path , output_filepath ):
427- path_parts = dataset_metadata_path .split (os .path .sep )
428- taxonomy_id = path_parts [len (path_parts ) - 1 ].split ("_" )[0 ]
429-
430- headers , dataset_metadata = read_csv_to_dict (dataset_metadata_path , generated_ids = True )
431-
432- robot_template_seed = {'ID' : 'ID' ,
433- 'TYPE' : 'TYPE' ,
434- 'Entity Type' : 'TI %' ,
435- 'Label' : 'LABEL' ,
436- 'Taxonomy' : 'AI schema:includedInDataCatalog' ,
437- 'Cell Count' : "AT 'cell_count'^^xsd:integer" ,
438- 'Nuclei Count' : "AT 'nuclei_count'^^xsd:integer" ,
439- 'Description' : "A rdfs:comment" ,
440- 'Download Link' : "A schema:archivedAt" ,
441- 'Explore Link' : "A schema:discussionUrl"
442- }
443- dl = [robot_template_seed ]
444-
445- dataset_index = 0
446- for dataset in dataset_metadata :
447- d = dict ()
448- d ['ID' ] = 'PCL:' + get_dataset_id (taxonomy_id , dataset_index )
449- d ['TYPE' ] = 'owl:NamedIndividual'
450- d ['Entity Type' ] = 'schema:Dataset' # Taxonomy
451- d ['Label' ] = dataset_metadata [dataset ]['Dataset' ]
452- d ['Taxonomy' ] = 'PCL:' + get_taxonomy_id (taxonomy_id )
453- cells_nuclei = dataset_metadata [dataset ]['cells/nuclei' ]
454- if 'nuclei' in cells_nuclei :
455- d ['Nuclei Count' ] = int ('' .join (c for c in cells_nuclei if c .isdigit ()))
456- elif 'cells' in cells_nuclei :
457- d ['Cell Count' ] = int ('' .join (c for c in cells_nuclei if c .isdigit ()))
458- d ['Description' ] = dataset_metadata [dataset ]['text' ]
459- d ['Download Link' ] = dataset_metadata [dataset ]['download_link' ]
460- d ['Explore Link' ] = dataset_metadata [dataset ]['explore_link' ]
461-
462- dataset_index += 1
463- dl .append (d )
464- robot_template = pd .DataFrame .from_records (dl )
465- robot_template .to_csv (output_filepath , sep = "\t " , index = False )
429+ def add_taxonomy_info_panel_properties (centralized_data_folder , d , taxon_config ):
430+ expected_folder_name = get_centralized_taxonomy_folder (taxon_config )
431+ taxonomy_metadata_path = TAXONOMY_INFO_CSV .format (centralized_data_folder , expected_folder_name ,
432+ taxon_config ["Taxonomy_id" ])
433+ if os .path .isfile (taxonomy_metadata_path ):
434+ headers , taxonomies_metadata = read_csv_to_dict (taxonomy_metadata_path )
435+ taxonomy_metadata = taxonomies_metadata [taxon_config ["Taxonomy_id" ]]
436+ d ['Number of Cell Types' ] = taxonomy_metadata ["Cell Types" ]
437+ d ['Number of Cell Subclasses' ] = taxonomy_metadata ["Cell Subclasses" ]
438+ d ['Number of Cell Classes' ] = taxonomy_metadata ["Cell Classes" ]
439+ d ['Species Label' ] = taxonomy_metadata ["Species" ]
440+ d ['Age' ] = taxonomy_metadata ["Age" ]
441+ d ['Sex' ] = taxonomy_metadata ["Sex" ]
442+ d ['Title' ] = taxonomy_metadata ["header" ]
443+ d ['Description' ] = taxonomy_metadata ["mainDescription" ]
444+ d ['Attribution' ] = taxonomy_metadata ["attribution" ]
445+ d ['SubDescription' ] = taxonomy_metadata ["subDescription" ]
446+ d ['Anatomy' ] = taxonomy_metadata ["Anatomy" ]
447+ else :
448+ raise ValueError ("Couldn't find taxonomy '{}' landingpage dataset info file at: '{}'"
449+ .format (taxon_config ["Taxonomy_id" ], taxonomy_metadata_path ))
450+
451+
452+ def generate_datasets_template (centralized_data_folder , output_filepath ):
453+ path_parts = output_filepath .split (os .path .sep )
454+ taxonomy_id = str (path_parts [len (path_parts ) - 1 ]).split ("_" )[0 ]
455+ taxonomy_config = read_taxonomy_config (taxonomy_id )
456+
457+ expected_file_name = DATASET_INFO_CSV .format (centralized_data_folder ,
458+ get_centralized_taxonomy_folder (taxonomy_config ), taxonomy_id )
459+
460+ if os .path .isfile (expected_file_name ):
461+ headers , dataset_metadata = read_csv_to_dict (expected_file_name , generated_ids = True )
462+
463+ robot_template_seed = {'ID' : 'ID' ,
464+ 'TYPE' : 'TYPE' ,
465+ 'Entity Type' : 'TI %' ,
466+ 'Label' : 'LABEL' ,
467+ 'PrefLabel' : 'A skos:prefLabel' ,
468+ 'Symbol' : 'A IAO:0000028' ,
469+ 'Taxonomy' : 'AI schema:includedInDataCatalog' ,
470+ 'Cell Count' : "AT 'cell_count'^^xsd:integer" ,
471+ 'Nuclei Count' : "AT 'nuclei_count'^^xsd:integer" ,
472+ 'Description' : "A rdfs:comment" ,
473+ 'Download Link' : "A schema:archivedAt" ,
474+ 'Explore Link' : "A schema:discussionUrl"
475+ }
476+ dl = [robot_template_seed ]
477+
478+ dataset_index = 0
479+ for dataset in dataset_metadata :
480+ d = dict ()
481+ d ['ID' ] = 'PCL:' + get_dataset_id (taxonomy_id , dataset_index )
482+ d ['TYPE' ] = 'owl:NamedIndividual'
483+ d ['Entity Type' ] = 'schema:Dataset' # Taxonomy
484+ d ['Label' ] = dataset_metadata [dataset ]['Ontology Name' ]
485+ d ['PrefLabel' ] = dataset_metadata [dataset ]['Dataset' ]
486+ d ['Symbol' ] = dataset_metadata [dataset ]['Ontology Symbol' ]
487+ d ['Taxonomy' ] = 'PCL:' + get_taxonomy_id (taxonomy_id )
488+ cells_nuclei = dataset_metadata [dataset ]['cells/nuclei' ]
489+ if 'nuclei' in cells_nuclei :
490+ d ['Nuclei Count' ] = int ('' .join (c for c in cells_nuclei if c .isdigit ()))
491+ elif 'cells' in cells_nuclei :
492+ d ['Cell Count' ] = int ('' .join (c for c in cells_nuclei if c .isdigit ()))
493+ d ['Description' ] = dataset_metadata [dataset ]['text' ]
494+ d ['Download Link' ] = dataset_metadata [dataset ]['download_link' ]
495+ d ['Explore Link' ] = dataset_metadata [dataset ]['explore_link' ]
496+
497+ dataset_index += 1
498+ dl .append (d )
499+ robot_template = pd .DataFrame .from_records (dl )
500+ robot_template .to_csv (output_filepath , sep = "\t " , index = False )
501+ else :
502+ raise ValueError ("Couldn't find taxonomy '{}' landingpage dataset info file at: '{}'"
503+ .format (taxonomy_id , expected_file_name ))
466504
467505
468506def generate_marker_gene_set_template (taxonomy_file_path , output_filepath ):
@@ -563,3 +601,15 @@ def merge_class_templates(base_tsv, curation_tsv, output_filepath):
563601 output_filepath: Output file path
564602 """
565603 merge_tables (base_tsv , curation_tsv , output_filepath )
604+
605+
606+ def get_centralized_taxonomy_folder (taxonomy_config ):
607+ """
608+ Expected folder name is: lower(Species_abbv) + Brain_region_abbv + "_" + Taxonomy_id
609+ Args:
610+ taxonomy_config: taxonomy configuration
611+
612+ Returns: expected centralized data location for the given taxonomy
613+ """
614+ return str (taxonomy_config ['Species_abbv' ][0 ]).lower () + taxonomy_config ['Brain_region_abbv' ][0 ] \
615+ + "_" + taxonomy_config ["Taxonomy_id" ]
0 commit comments