1+ from collections import defaultdict
12import os
23import pathlib
34import logging
@@ -11,25 +12,63 @@ def __init__(self, tool_yaml):
1112 self .yaml_data = {}
1213
1314 with open (tool_yaml , 'r' ) as f :
14- self .data = safe_load (f )
15+ self .yaml_data = safe_load (f )
1516
1617 logging .info ('Processing ' + tool_yaml )
1718
1819 def write_yaml (self , output_dir , dry_run = False , remove_input = False ):
19- if not self .yaml_data .get ('software' ):
20- logging .error ('"software" key not found or empty' )
21- if len (self .yaml_data .get ('software' )) > 1 :
22- logging .error ('More than one software in yaml file: this should not happen' )
23- tool_name = list (self .yaml_data ['software' ].keys ())[0 ]
24-
25- output_path = os .path .join (output_dir , tool_name , '{}.biocontainers.yaml' .format (tool_name ))
26-
27- logging .info ("Moving {} to {}" .format (self .yaml_path , output_path ))
28-
29- if not dry_run :
30- pathlib .Path (os .path .join (output_dir , tool_name )).mkdir (parents = True , exist_ok = True )
31- with open (output_path , 'w' ) as f :
32- dump (self .yaml_data , f )
33- if remove_input :
34- logging .info ("Removing {}" .format (self .yaml_path ))
35- os .remove (self .yaml_path )
20+ to_merge = {}
21+ if not self .yaml_data .get ('softwares' ):
22+ logging .error ('"softwares" key not found or empty' )
23+ return False
24+ if len (self .yaml_data .get ('softwares' )) > 1 :
25+ biotool = set ()
26+ non_biotool_label = set ()
27+ for key , soft in self .yaml_data ['softwares' ].items ():
28+ biotool .add (soft ['labels' ].get ('extra.identifiers.biotools' , '' ))
29+ if not soft ['labels' ].get ('extra.identifiers.biotools' ):
30+ non_biotool_label .add (key )
31+ if len (biotool ) > 1 :
32+ if len (biotool ) == 2 and '' in biotool :
33+ logging .warn ("Both empty and non-empty biotool id in {}. Assuming they are the same" .format (self .yaml_path ))
34+ assumed_biotool = [x for x in biotool if x ][0 ]
35+ logging .warn ("Adding {} to biotool {}" .format (non_biotool_label , assumed_biotool ))
36+ for nbl in non_biotool_label :
37+ to_merge [nbl ] = assumed_biotool
38+ else :
39+ logging .error ("Multiple distinct biotools in {}: stopping" .format (self .yaml_path ))
40+ return False
41+
42+ data = defaultdict (list )
43+
44+ for key , values in self .yaml_data ['softwares' ].items ():
45+ tool_name = key
46+ biotool_id = values ['labels' ]['extra.identifiers.biotools' ] if 'extra.identifiers.biotools' in values ['labels' ] else key
47+
48+ if tool_name in to_merge :
49+ biotool_id = to_merge [tool_name ]
50+ logging .warn ("Assuming {} biotool id is {}" .format (tool_name , biotool_id ))
51+
52+ data [biotool_id ].append ({"tool" : tool_name , "value" : values })
53+
54+ for key , values in data .items ():
55+ for val in values :
56+ output_path = os .path .join (output_dir , key , '{}.biocontainers.yaml' .format (val ['tool' ]))
57+
58+ if len (values ) == 1 :
59+ logging .info ("Moving {} to {}" .format (self .yaml_path , output_path ))
60+
61+ else :
62+ logging .info ("Splitting {} to {}" .format (self .yaml_path , output_path ))
63+
64+ if not dry_run :
65+ pathlib .Path (os .path .join (output_dir , key )).mkdir (parents = True , exist_ok = True )
66+ yaml_content = {"softwares" : {}}
67+ yaml_content ['softwares' ][val ['tool' ]] = val ['value' ]
68+
69+ with open (output_path , 'w' ) as f :
70+ dump (self .yaml_data , f )
71+ if remove_input :
72+ logging .info ("Removing {}" .format (self .yaml_path ))
73+ os .remove (self .yaml_path )
74+ return True
0 commit comments