Skip to content

Commit 2a36486

Browse files
authored
Merge pull request #14 from mboudet/build_arm
Build arm
2 parents aebf7a2 + 87d4e15 commit 2a36486

File tree

6 files changed

+349
-87
lines changed

6 files changed

+349
-87
lines changed

biotools_cleaner/cleaner.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import logging
5+
6+
from tool import Tool
7+
8+
logging.basicConfig()
9+
logging.root.setLevel(logging.INFO)
10+
11+
if __name__ == '__main__':
12+
parser = argparse.ArgumentParser()
13+
parser.add_argument('input', type=str, help="Path to yaml file")
14+
parser.add_argument('output', type=str, help="Output dir (ie, Research-software-ecosystem repository)")
15+
parser.add_argument('--dry-run', action='store_true', help="Dry run")
16+
parser.add_argument('--cleanup', action='store_true', help="Remove old layout files from repository")
17+
args = parser.parse_args()
18+
19+
tool = Tool(args.input)
20+
tool.write_yaml(args.output, dry_run=args.dry_run, remove_input=args.cleanup)

biotools_cleaner/cleaner_batch.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import pathlib
5+
import logging
6+
7+
from tool import Tool
8+
9+
logging.basicConfig()
10+
# logging.root.setLevel(logging.INFO)
11+
12+
if __name__ == '__main__':
13+
parser = argparse.ArgumentParser()
14+
parser.add_argument('rse_repo', type=str, help="Research-software-ecosystem data folder")
15+
parser.add_argument('--dry-run', action='store_true', help="Dry run")
16+
parser.add_argument('--cleanup', action='store_true', help="Remove old layout files from repository")
17+
parser.add_argument('--add-label', action='store_true', help="Make sure all tools in a specific file have the same biotool label")
18+
19+
args = parser.parse_args()
20+
21+
for path in pathlib.Path(args.rse_repo).rglob("biocontainers.yaml"):
22+
tool = Tool(str(path.resolve()))
23+
tool.write_yaml(args.rse_repo, dry_run=args.dry_run, remove_input=args.cleanup, add_biotool=args.add_label)

biotools_cleaner/tool.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from collections import defaultdict
2+
import os
3+
import pathlib
4+
import logging
5+
from yaml import safe_load, dump
6+
7+
8+
class Tool:
9+
10+
def __init__(self, tool_yaml):
11+
self.yaml_path = tool_yaml
12+
self.yaml_data = {}
13+
14+
with open(tool_yaml, 'r') as f:
15+
self.yaml_data = safe_load(f)
16+
17+
logging.info('Processing ' + tool_yaml)
18+
19+
def write_yaml(self, output_dir, dry_run=False, remove_input=False, add_biotool=False):
20+
to_merge = {}
21+
if not self.yaml_data.get('softwares'):
22+
logging.error('"softwares" key not found or empty')
23+
return False
24+
if len(self.yaml_data.get('softwares')) > 1:
25+
biotool = set()
26+
non_biotool_label = set()
27+
for key, soft in self.yaml_data['softwares'].items():
28+
biotool.add(soft['labels'].get('extra.identifiers.biotools', ''))
29+
if not soft['labels'].get('extra.identifiers.biotools'):
30+
non_biotool_label.add(key)
31+
if len(biotool) > 1:
32+
if len(biotool) == 2 and '' in biotool:
33+
logging.warn("Both empty and non-empty biotool id in {}. Assuming they are the same".format(self.yaml_path))
34+
assumed_biotool = [x for x in biotool if x][0]
35+
logging.warn("Adding {} to biotool {}".format(non_biotool_label, assumed_biotool))
36+
for nbl in non_biotool_label:
37+
to_merge[nbl] = assumed_biotool
38+
else:
39+
logging.error("Multiple distinct biotools in {}: stopping".format(self.yaml_path))
40+
return False
41+
42+
data = defaultdict(list)
43+
44+
for key, values in self.yaml_data['softwares'].items():
45+
tool_name = key
46+
biotool_id = values['labels']['extra.identifiers.biotools'] if 'extra.identifiers.biotools' in values['labels'] else key
47+
48+
if tool_name in to_merge:
49+
biotool_id = to_merge[tool_name]
50+
logging.warn("Assuming {} biotool id is {}".format(tool_name, biotool_id))
51+
if add_biotool:
52+
logging.warn("Adding biotool label")
53+
values['labels']['extra.identifiers.biotools'] = biotool_id
54+
55+
data[biotool_id].append({"tool": tool_name, "value": values})
56+
57+
for key, values in data.items():
58+
for val in values:
59+
output_path = os.path.join(output_dir, key, '{}.biocontainers.yaml'.format(val['tool']))
60+
61+
if len(values) == 1:
62+
logging.info("Moving {} to {}".format(self.yaml_path, output_path))
63+
64+
else:
65+
logging.info("Splitting {} to {}".format(self.yaml_path, output_path))
66+
67+
if not dry_run:
68+
pathlib.Path(os.path.join(output_dir, key)).mkdir(parents=True, exist_ok=True)
69+
yaml_content = {"softwares": {}}
70+
yaml_content['softwares'][val['tool']] = val['value']
71+
72+
with open(output_path, 'w') as f:
73+
dump(self.yaml_data, f)
74+
if remove_input:
75+
logging.info("Removing {}".format(self.yaml_path))
76+
os.remove(self.yaml_path)
77+
return True

github-ci/src/biocontainersci/biotools.py

Lines changed: 44 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,20 @@
55

66
import shutil
77
import requests
8-
import sys
98
import os
109
import logging
11-
import re
1210
import git
1311
import datetime
1412
from copy import deepcopy
15-
from yaml import load, dump
13+
from yaml import dump
1614
try:
17-
from yaml import CLoader as Loader, CDumper as Dumper
15+
from yaml import CDumper as Dumper
1816
except ImportError:
19-
from yaml import Loader, Dumper
17+
from yaml import Dumper
2018

2119
from biocontainersci.utils import BiocontainersCIException
2220

21+
2322
class Biotools:
2423

2524
GIT_REPO = '[email protected]:bio-tools/content.git'
@@ -90,15 +89,15 @@ def create_pr(self, branch):
9089
}
9190
github_url = 'https://api.github.com/repos/%s/pulls' % ("bio-tools/content")
9291
res = requests.post(
93-
github_url,
94-
json={
95-
'title': "biocontainers-bot metadata import PR",
96-
'head': branch,
97-
"base": "master"
98-
},
99-
headers=headers
92+
github_url,
93+
json={
94+
'title': "biocontainers-bot metadata import PR",
95+
'head': branch,
96+
"base": "master"
97+
},
98+
headers=headers
10099
)
101-
if not res.status_code in [200, 201]:
100+
if res.status_code not in [200, 201]:
102101
logging.error("[biotools] Failed to create pull request: %s", res.text)
103102
return False
104103
pr = res.json()
@@ -107,13 +106,13 @@ def create_pr(self, branch):
107106
github_url = 'https://api.github.com/repos/%s/issues/%d' % ("bio-tools/content", issue)
108107

109108
res = requests.post(
110-
github_url,
111-
json={
112-
'labels': [self.BOT_LABEL],
113-
},
114-
headers=headers
109+
github_url,
110+
json={
111+
'labels': [self.BOT_LABEL],
112+
},
113+
headers=headers
115114
)
116-
if not res.status_code in [200]:
115+
if res.status_code not in [200]:
117116
logging.error("Failed to add issue label: %d" % res.status_code)
118117

119118
logging.info("Tagged issue: %d" % issue)
@@ -148,41 +147,34 @@ def run(self, f, labels, branch=None):
148147
try:
149148
(repo, branch) = self.repo_setup(branch)
150149

151-
tmpdir = self.REPO + '/data/'
152-
dirname = tmpdir + name
153-
biocontainers_file = tmpdir + name + '/biocontainers.yaml'
150+
all_tmpdir = self.REPO + '/import/biocontainers/'
151+
if not os.path.exists(all_tmpdir):
152+
os.makedirs(all_tmpdir)
153+
files_to_write = [all_tmpdir + '{}.biocontainers.yaml'.format(name)]
154154
if biotools is not None:
155-
dirname = tmpdir + biotools
156-
biocontainers_file = tmpdir + biotools + '/biocontainers.yaml'
157-
158-
if not os.path.exists(dirname):
159-
os.makedirs(dirname)
155+
biotool_tmpdir = self.REPO + '/data/{}/'.format(biotools)
156+
if not os.path.exists(biotool_tmpdir):
157+
os.makedirs(biotool_tmpdir)
158+
files_to_write.append(biotool_tmpdir + '{}.biocontainers.yaml'.format(name))
160159

161160
clabels = {}
162161
for k, v in labels.items():
163162
clabels[k] = v
164163

165164
data = {
166-
'software': name,
167-
'labels': deepcopy(clabels),
168-
'versions': []
169-
}
165+
'software': name,
166+
'labels': deepcopy(clabels),
167+
'versions': []
168+
}
169+
170170
softwares = {'softwares': {}}
171171
softwares["softwares"][name] = data
172-
if os.path.exists(biocontainers_file):
173-
with open(biocontainers_file) as fp:
174-
softwares = load(fp, Loader=Loader)
175172

176-
if name not in softwares["softwares"]:
177-
softwares["softwares"][name] = data
173+
for file_path in files_to_write:
178174

179-
exists = False
180-
for download in softwares["softwares"][name]["versions"]:
181-
if download["version"] == container_version:
182-
exists = True
183-
break
175+
if name not in softwares["softwares"]:
176+
softwares["softwares"][name] = data
184177

185-
if not exists:
186178
new_download = {
187179
"url": "biocontainers/" + name + ":" + container_version,
188180
"version": container_version,
@@ -191,14 +183,18 @@ def run(self, f, labels, branch=None):
191183
}
192184
softwares["softwares"][name]["versions"].append(new_download)
193185

194-
with open(biocontainers_file, 'w') as fp:
186+
with open(file_path, 'w') as fp:
195187
dump(softwares, fp, Dumper=Dumper)
196188

197-
repo.index.add([biocontainers_file])
198-
if biotools is not None:
199-
repo.index.commit("Add version for %s:%s" % (biotools, container_version))
200-
else:
201-
repo.index.commit("Add version for %s:%s" % (name, container_version))
189+
changed = False
190+
changed_files = [item.a_path for item in repo.index.diff(None)]
191+
for file_path in files_to_write:
192+
if file_path in changed_files:
193+
repo.index.add([file_path])
194+
changed = True
195+
196+
if changed:
197+
repo.index.commit("Add version for %s:%s" % (name, container_version))
202198
try:
203199
logging.info("[biotools] Push to branch %s" % branch)
204200

0 commit comments

Comments
 (0)