Skip to content

Commit 6fac35f

Browse files
authored
Merge pull request #18 from AlgoLab/use_snpEff
Add SnpEff to MALVIRUS pipeline
2 parents 625a7c5 + 2271e64 commit 6fac35f

31 files changed

+1054
-696
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ MALVIRUS is a fast and accurate tool for genotyping haploid individuals that doe
66
It is tailored to work with virological data (including but not limited to SARS-CoV-2) and can genotype an individual directly from sequencing data in minutes.
77

88
MALVIRUS is divided into two logically distinct steps: the creation of a variant catalog from a set of assemblies and the genotype calling.
9-
The first step is based on mafft [[1]](#mafft7) and snp-sites [[2]](#snp-sites), whereas the second step is based on KMC [[3]](#kmc) and MALVA [[4]](#malva).
9+
The first step is based on mafft [[1]](#mafft7) and snp-sites [[2]](#snp-sites), whereas the second step is based on KMC [[3]](#kmc), MALVA [[4]](#malva), and SnpEff [[5]](#snpeff).
1010

1111
The variant catalog can be built once and reused for genotyping multiple individuals.
1212

@@ -30,3 +30,5 @@ bioRxiv 2020.05.05.076992; doi: [10.1101/2020.05.05.076992](https://doi.org/10.1
3030
<a id="kmc">[3]</a> Kokot, Marek, Maciej Dlugosz, and Sebastian Deorowicz. 2017. “KMC 3: counting and manipulating k-mer statistics.” Bioinformatics 33 (17): 2759–61. doi:[10.1093/bioinformatics/btx304](https://doi.org/10.1093/bioinformatics/btx304).
3131

3232
<a id="malva">[4]</a> Denti, Luca, Marco Previtali, Giulia Bernardini, Alexander Schönhuth, and Paola Bonizzoni. 2019. “MALVA: Genotyping by Mapping-Free Allele Detection of Known Variants.” iScience 18: 20–27. doi:[10.1016/j.isci.2019.07.011](https://doi.org/10.1016/j.isci.2019.07.011).
33+
34+
<a id="snpeff">[5]</a> Pablo Cingolani _et al_. 2012. “A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3” Fly 6(2): 80-92. doi:[10.4161/fly.19695](https://doi.org/10.4161/fly.19695).

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ dependencies:
1212
- gffutils=0.10.1
1313
- snp-sites=2.5.1
1414
- malva=1.3.1
15+
- snpeff=4.5covid19

flask/app/views.py

Lines changed: 81 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from app import app
55
from werkzeug.utils import secure_filename
66

7-
from os.path import join as pjoin
87
from pathlib import Path
98
from os import getcwd
109
import os
@@ -21,12 +20,32 @@
2120
def mkdirp(path):
2221
Path(path).mkdir(parents=True, exist_ok=True)
2322

23+
def pjoin(basepath, *paths):
24+
path = os.path.join(basepath, *paths)
25+
if not os.path.abspath(path).startswith(os.path.abspath(basepath)):
26+
raise Exception('Trying to access a non safe-path.')
27+
return path
2428

2529
@app.route('/<path:route>')
2630
def not_found(route):
2731
return abort(make_response(jsonify(message='Route not found'), 404))
2832

2933

34+
def base_get_refs():
35+
try:
36+
with open(pjoin(app.config['JOB_DIR'], 'refs', 'refs.json'), 'r') as f:
37+
refs = json.load(f)
38+
return refs
39+
except:
40+
return None
41+
42+
@app.route('/ref', methods=['GET'])
43+
def get_refs():
44+
refs = base_get_refs()
45+
if refs is None:
46+
return jsonify([])
47+
return jsonify(refs)
48+
3049
@app.route('/vcf', methods=['GET'])
3150
def get_vcf_list():
3251
vcfs = list()
@@ -121,6 +140,9 @@ def rm_vcf(vcf_id):
121140

122141
return jsonify(info)
123142

143+
def first_true(iterable, default=None, pred=None):
144+
return next(filter(pred, iterable), default)
145+
124146

125147
@app.route('/vcf', methods=['POST'])
126148
def post_vcf():
@@ -137,16 +159,25 @@ def post_vcf():
137159

138160
if 'file' not in request.files:
139161
abort(make_response(jsonify(message="Missing file"), 400))
140-
if 'reference' not in request.files:
162+
163+
custom_ref = ('refid' not in request.form or request.form['refid'] == '__custom__')
164+
if custom_ref and 'reference' not in request.files:
141165
abort(make_response(jsonify(message="Missing file"), 400))
142166

143167
rfile = request.files['file']
144-
reffile = request.files['reference']
145-
146168
if rfile.filename == '':
147169
abort(make_response(jsonify(message="Missing filename"), 400))
148-
if reffile.filename == '':
149-
abort(make_response(jsonify(message="Missing filename"), 400))
170+
171+
if custom_ref:
172+
reffile = request.files['reference']
173+
if reffile.filename == '':
174+
abort(make_response(jsonify(message="Missing filename"), 400))
175+
else:
176+
refs = base_get_refs()
177+
refid = request.form['refid']
178+
ref = first_true(refs, None, lambda x: x['id'] == refid)
179+
if ref is None:
180+
abort(make_response(jsonify(message="Unknown ref"), 400))
150181

151182
uuid = datetime.datetime.now().strftime('%Y%m%d-%H%M%S_') + str(uuid4())
152183

@@ -173,24 +204,37 @@ def post_vcf():
173204
os.remove(dfile)
174205
dfile = nfile
175206

176-
# Download reference
177-
refpath = pjoin(workdir, secure_filename(reffile.filename))
178-
reffile.save(refpath)
179-
if refpath.endswith('.gz'):
180-
nfile = refpath.replace('.gz', '')
181-
with gzip.open(refpath, 'rb') as f_in:
182-
with open(nfile, 'wb') as f_out:
183-
shutil.copyfileobj(f_in, f_out)
184-
os.remove(refpath)
185-
refpath = nfile
186-
187-
# Download GTF (optional)
188-
if 'gtf' not in request.files:
189-
gtfpath = "NULL"
207+
if custom_ref:
208+
# Download reference
209+
refpath = pjoin(workdir, secure_filename(reffile.filename))
210+
reffile.save(refpath)
211+
if refpath.endswith('.gz'):
212+
nfile = refpath.replace('.gz', '')
213+
with gzip.open(refpath, 'rb') as f_in:
214+
with open(nfile, 'wb') as f_out:
215+
shutil.copyfileobj(f_in, f_out)
216+
os.remove(refpath)
217+
refpath = nfile
218+
219+
# Download GTF (optional)
220+
if 'gtf' not in request.files:
221+
gtfpath = "NULL"
222+
else:
223+
gtffile = request.files['gtf']
224+
gtfpath = pjoin(workdir, secure_filename(gtffile.filename))
225+
gtffile.save(gtfpath)
190226
else:
191-
gtffile = request.files['gtf']
192-
gtfpath = pjoin(workdir, secure_filename(gtffile.filename))
193-
gtffile.save(gtfpath)
227+
# Copy reference
228+
sourcepath = pjoin(app.config['JOB_DIR'], 'refs', ref['reference']['file'])
229+
refpath = pjoin(workdir, secure_filename(ref['reference']['file']))
230+
shutil.copy2(sourcepath, refpath)
231+
232+
# Copy GTF
233+
sourcepath = pjoin(app.config['JOB_DIR'], 'refs', ref['annotation']['file'])
234+
gtfpath = pjoin(workdir, secure_filename(ref['annotation']['file']))
235+
shutil.copy2(sourcepath, gtfpath)
236+
237+
194238

195239
info = {
196240
"filename": dfile,
@@ -203,6 +247,8 @@ def post_vcf():
203247
}
204248
if filetype == 'fasta':
205249
info['params'] = {"cores": cores}
250+
if not custom_ref:
251+
info['internal_ref'] = ref
206252

207253
with open(pjoin(workdir, 'info.json'), 'w+') as f:
208254
json.dump(info, f)
@@ -385,7 +431,7 @@ def post_malva():
385431
with open(pjoin(app.config['JOB_DIR'], 'vcf', vcf, 'status.json'), 'r') as f:
386432
status = json.load(f)
387433
with open(pjoin(app.config['JOB_DIR'], 'vcf', vcf, 'info.json'), 'r') as f:
388-
info = json.load(f)
434+
binfo = json.load(f)
389435

390436
if status['status'] in ['Uploaded', 'Precomputed']:
391437
vcfpath = status['output']['vcf']
@@ -396,8 +442,8 @@ def post_malva():
396442
vcf,
397443
'vcf', 'run.cleaned.vcf'
398444
)
399-
reference = info['reference']
400-
gtf = info['gtf']
445+
reference = binfo['reference']
446+
gtf = binfo['gtf']
401447

402448
uuid = datetime.datetime.now().strftime('%Y%m%d-%H%M%S_') + str(uuid4())
403449
workdir = pjoin(
@@ -445,6 +491,11 @@ def post_malva():
445491
},
446492
"submission_time": int(round(time()))
447493
}
494+
495+
has_internal_ref = 'internal_ref' in binfo
496+
if has_internal_ref:
497+
info['internal_ref'] = binfo['internal_ref']
498+
448499
with open(pjoin(workdir, 'info.json'), 'w+') as f:
449500
json.dump(info, f)
450501

@@ -463,6 +514,10 @@ def post_malva():
463514
f'gtf: {gtf}\n' +
464515
f'cores: {cores}\n'
465516
)
517+
if has_internal_ref and ('snpEff' in info['internal_ref']) and ('id' in info['internal_ref']['snpEff']):
518+
conf.write(
519+
f"refname: {info['internal_ref']['snpEff']['id']}\n"
520+
)
466521

467522
status = {
468523
"status": "Pending",

frontend/craco.config.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ module.exports = {
1111
plugin: CracoLessPlugin,
1212
options: {
1313
lessLoaderOptions: {
14-
modifyVars: theme,
15-
javascriptEnabled: true,
14+
lessOptions: {
15+
modifyVars: theme,
16+
javascriptEnabled: true,
17+
},
1618
},
1719
},
1820
},

frontend/package.json

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,18 @@
55
"dependencies": {
66
"@2fd/ant-design-icons": "2.1.0",
77
"@ant-design/icons": "4.2.2",
8-
"antd": "4.5.3",
8+
"antd": "4.6.4",
99
"core-js": "^3.5.0",
10-
"dayjs": "1.8.33",
10+
"dayjs": "1.8.35",
1111
"history": "5.0.0",
1212
"markdown-to-jsx": "6.11.4",
13-
"rc-resize-observer": "0.2.3",
1413
"react": "^16.13.1",
1514
"react-app-polyfill": "^1.0.6",
1615
"react-dom": "^16.13.1",
1716
"react-refetch": "^3.0.1",
1817
"react-router": "6.0.0-alpha.3",
1918
"react-router-dom": "6.0.0-alpha.3",
20-
"react-scripts": "3.4.1",
19+
"react-scripts": "3.4.3",
2120
"react-window": "^1.8.5",
2221
"xlsx": "^0.15.6"
2322
},
@@ -42,10 +41,10 @@
4241
"@craco/craco": "5.6.4",
4342
"antd-dayjs-webpack-plugin": "1.0.1",
4443
"babel-plugin-import": "1.13.0",
45-
"craco-less": "1.16.0",
44+
"craco-less": "1.17.0",
4645
"eslint": "^6.1.0",
4746
"eslint-config-prettier": "6.11.0",
4847
"eslint-plugin-prettier": "3.1.4",
49-
"prettier": "2.0.5"
48+
"prettier": "2.1.1"
5049
}
5150
}

frontend/src/ajax/refs.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { api } from 'app-config';
2+
3+
import { connect } from './utils';
4+
5+
const refs = {
6+
url: api.ref,
7+
force: true,
8+
refreshing: true,
9+
};
10+
11+
const ajaxRefs = connect(() => ({
12+
refs,
13+
reloadRefs: () => ({
14+
refs,
15+
}),
16+
}));
17+
18+
export default ajaxRefs;

frontend/src/app-config.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export const api = {
1010
return `${wsBaseHref}/malva${isNotNull(idm)}`;
1111
},
1212
update: `${wsBaseHref}/update`,
13+
ref: `${wsBaseHref}/ref`,
1314
};
1415

1516
export const basepath = process.env.PUBLIC_URL

frontend/src/pages/CallReport/CallReport.jsx

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,31 @@ import { Error, Loading } from 'components';
77

88
import GenotypeTable from './GenotypeTable';
99
import DownloadAsXlsx from './DownloadAsXlsx';
10+
import { ANN_FIELDS } from './utils';
11+
12+
const customTranslation = {
13+
ANN: function (effects) {
14+
return effects.split(',').map((effect, idx) => ({
15+
key: idx,
16+
...Object.fromEntries(
17+
effect.split('|').map((v, i) => [ANN_FIELDS[i] || i, v])
18+
),
19+
}));
20+
},
21+
};
22+
23+
function info2dict(info) {
24+
if (!info) return {};
25+
return Object.fromEntries(
26+
info
27+
.split(';')
28+
.map((field) => field.split('=', 2))
29+
.map(([key, value]) => [
30+
key,
31+
customTranslation[key] ? customTranslation[key](value) : value,
32+
])
33+
);
34+
}
1035

1136
function vcf2data(vcf) {
1237
const [pheader, ...data] = vcf
@@ -26,10 +51,11 @@ function vcf2data(vcf) {
2651
variant.DONOR && variant.DONOR.indexOf(':') !== -1
2752
? variant.DONOR.split(':').map((x) => +x)
2853
: undefined,
29-
_gene:
30-
variant.INFO && variant.INFO.startsWith('GENE=')
31-
? variant.INFO.slice(5)
32-
: undefined,
54+
_info: info2dict(variant.INFO),
55+
...variant,
56+
}))
57+
.map((variant) => ({
58+
_gene: variant._info && variant._info.GENE,
3359
...variant,
3460
}));
3561
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import React, { useCallback } from 'react';
2+
import { Button, Modal, Table } from 'antd';
3+
4+
import { ANN_FIELDS } from './utils';
5+
6+
const tableScroll = { x: '100%' };
7+
8+
const columns = ANN_FIELDS.map((field) => ({ title: field, dataIndex: field }));
9+
10+
function EffectTable({ effects }) {
11+
return (
12+
<Table
13+
dataSource={effects}
14+
rowKey="key"
15+
columns={columns}
16+
pagination={false}
17+
size="small"
18+
scroll={tableScroll}
19+
bordered
20+
/>
21+
);
22+
}
23+
24+
const buttonStyle = { display: 'inline', height: 'unset', padding: 'unset' };
25+
26+
function EffectsText({ effects }) {
27+
const onClick = useCallback(
28+
() =>
29+
Modal.info({
30+
title: 'Effects predicted by SnpEff',
31+
content: <EffectTable effects={effects} />,
32+
width: '80%',
33+
icon: false,
34+
maskClosable: true,
35+
}),
36+
[effects]
37+
);
38+
if (!effects) return 'None';
39+
const effectText = [
40+
...new Set(
41+
effects
42+
.filter((effect) => effect['Annotation Impact'] !== 'MODIFIER')
43+
.map((effect) => effect['Annotation'])
44+
),
45+
].join(', ');
46+
return (
47+
<Button onClick={onClick} type="link" style={buttonStyle}>
48+
{effectText}
49+
</Button>
50+
);
51+
}
52+
53+
export default EffectsText;

0 commit comments

Comments
 (0)