Skip to content

Commit 27f119f

Browse files
committed
More stuff
1 parent a17c1c7 commit 27f119f

File tree

3 files changed

+67
-21
lines changed

3 files changed

+67
-21
lines changed

biotools_cleaner/cleaner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
#!/usr/bin/env python
22

33
import argparse
4+
import logging
45

5-
from .tool import Tool
6+
from tool import Tool
67

8+
logging.basicConfig()
9+
logging.root.setLevel(logging.INFO)
710

811
if __name__ == '__main__':
912
parser = argparse.ArgumentParser()

biotools_cleaner/cleaner_batch.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22

33
import argparse
44
import pathlib
5+
import logging
56

6-
from .tool import Tool
7+
from tool import Tool
8+
9+
logging.basicConfig()
10+
# logging.root.setLevel(logging.INFO)
711

812
if __name__ == '__main__':
913
parser = argparse.ArgumentParser()
@@ -13,5 +17,5 @@
1317
args = parser.parse_args()
1418

1519
for path in pathlib.Path(args.rse_repo).rglob("biocontainers.yaml"):
16-
tool = Tool(path.name)
20+
tool = Tool(str(path.resolve()))
1721
tool.write_yaml(args.rse_repo, dry_run=args.dry_run, remove_input=args.cleanup)

biotools_cleaner/tool.py

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import defaultdict
12
import os
23
import pathlib
34
import logging
@@ -11,25 +12,63 @@ def __init__(self, tool_yaml):
1112
self.yaml_data = {}
1213

1314
with open(tool_yaml, 'r') as f:
14-
self.data = safe_load(f)
15+
self.yaml_data = safe_load(f)
1516

1617
logging.info('Processing ' + tool_yaml)
1718

1819
def write_yaml(self, output_dir, dry_run=False, remove_input=False):
19-
if not self.yaml_data.get('software'):
20-
logging.error('"software" key not found or empty')
21-
if len(self.yaml_data.get('software')) > 1:
22-
logging.error('More than one software in yaml file: this should not happen')
23-
tool_name = list(self.yaml_data['software'].keys())[0]
24-
25-
output_path = os.path.join(output_dir, tool_name, '{}.biocontainers.yaml'.format(tool_name))
26-
27-
logging.info("Moving {} to {}".format(self.yaml_path, output_path))
28-
29-
if not dry_run:
30-
pathlib.Path(os.path.join(output_dir, tool_name)).mkdir(parents=True, exist_ok=True)
31-
with open(output_path, 'w') as f:
32-
dump(self.yaml_data, f)
33-
if remove_input:
34-
logging.info("Removing {}".format(self.yaml_path))
35-
os.remove(self.yaml_path)
20+
to_merge = {}
21+
if not self.yaml_data.get('softwares'):
22+
logging.error('"softwares" key not found or empty')
23+
return False
24+
if len(self.yaml_data.get('softwares')) > 1:
25+
biotool = set()
26+
non_biotool_label = set()
27+
for key, soft in self.yaml_data['softwares'].items():
28+
biotool.add(soft['labels'].get('extra.identifiers.biotools', ''))
29+
if not soft['labels'].get('extra.identifiers.biotools'):
30+
non_biotool_label.add(key)
31+
if len(biotool) > 1:
32+
if len(biotool) == 2 and '' in biotool:
33+
logging.warn("Both empty and non-empty biotool id in {}. Assuming they are the same".format(self.yaml_path))
34+
assumed_biotool = [x for x in biotool if x][0]
35+
logging.warn("Adding {} to biotool {}".format(non_biotool_label, assumed_biotool))
36+
for nbl in non_biotool_label:
37+
to_merge[nbl] = assumed_biotool
38+
else:
39+
logging.error("Multiple distinct biotools in {}: stopping".format(self.yaml_path))
40+
return False
41+
42+
data = defaultdict(list)
43+
44+
for key, values in self.yaml_data['softwares'].items():
45+
tool_name = key
46+
biotool_id = values['labels']['extra.identifiers.biotools'] if 'extra.identifiers.biotools' in values['labels'] else key
47+
48+
if tool_name in to_merge:
49+
biotool_id = to_merge[tool_name]
50+
logging.warn("Assuming {} biotool id is {}".format(tool_name, biotool_id))
51+
52+
data[biotool_id].append({"tool": tool_name, "value": values})
53+
54+
for key, values in data.items():
55+
for val in values:
56+
output_path = os.path.join(output_dir, key, '{}.biocontainers.yaml'.format(val['tool']))
57+
58+
if len(values) == 1:
59+
logging.info("Moving {} to {}".format(self.yaml_path, output_path))
60+
61+
else:
62+
logging.info("Splitting {} to {}".format(self.yaml_path, output_path))
63+
64+
if not dry_run:
65+
pathlib.Path(os.path.join(output_dir, key)).mkdir(parents=True, exist_ok=True)
66+
yaml_content = {"softwares": {}}
67+
yaml_content['softwares'][val['tool']] = val['value']
68+
69+
with open(output_path, 'w') as f:
70+
dump(self.yaml_data, f)
71+
if remove_input:
72+
logging.info("Removing {}".format(self.yaml_path))
73+
os.remove(self.yaml_path)
74+
return True

0 commit comments

Comments
 (0)