Skip to content

Commit 3ac54c4

Browse files
authored
Merge pull request #92 from nschcolnicov/database_fix
Added handling for custom databases
2 parents 603f45c + 4405d3a commit 3ac54c4

File tree

12 files changed

+44
-19
lines changed

12 files changed

+44
-19
lines changed

HISTORY.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
0.4.29
2+
3+
* Update handling of --database argument [#90](https://github.com/miRTop/mirtop/issues/90)
4+
15
0.4.28
26

37
* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83)

mirtop/command_line.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from mirtop.libs import spikeins
1515
from mirtop.gff import update
1616
from mirtop.sql import sql
17+
from mirtop.mirna import mapper
1718
import mirtop.libs.logger as mylog
1819

1920
import time
@@ -25,6 +26,9 @@ def main(**kwargs):
2526
kwargs['args'].print_debug)
2627
logger = mylog.getLogger(__name__)
2728
start = time.time()
29+
if not hasattr(kwargs["args"], "database"):
30+
if ("sql" not in kwargs and "stats" not in kwargs and "update" not in kwargs and "validate" not in kwargs):
31+
kwargs["args"].database = mapper.guess_database(kwargs["args"])
2832

2933
if "gff" in kwargs:
3034
logger.info("Run annotation")

mirtop/exporter/isomirs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def convert(args):
3838
def _convert_file(gff, args):
3939
sep = "\t"
4040
precursors = fasta.read_precursor(args.hairpin, args.sps)
41-
matures = mapper.read_gtf_to_precursor(args.gtf)
41+
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
4242
variant_header = sep.join(['mism', 'add', 't5', 't3'])
4343

4444
gff_file = open(gff, 'r')

mirtop/exporter/vcf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def convert(args):
2525
for fn in args.files:
2626
out_file = op.join(args.out, "%s.vcf" % op.splitext(op.basename(fn))[0])
2727
logger.info("Reading %s" % fn)
28-
create_vcf(fn, args.hairpin, args.gtf, out_file)
28+
create_vcf(fn, args.hairpin, args.gtf, out_file, args.database)
2929
logger.info("VCF generated %s" % out_file)
3030

3131

@@ -121,7 +121,7 @@ def cigar_2_key(cigar, readseq, refseq, pos, var5p, var3p, parent_ini_pos, paren
121121
return(key_pos, key_var, ref, alt)
122122

123123

124-
def create_vcf(mirgff3, precursor, gtf, vcffile):
124+
def create_vcf(mirgff3, precursor, gtf, vcffile, database):
125125
"""
126126
Args:
127127
'mirgff3(str)': File with mirGFF3 format that will be converted
@@ -178,7 +178,7 @@ def create_vcf(mirgff3, precursor, gtf, vcffile):
178178
n_noSNP = 0
179179
no_var = 0
180180
hairpins = read_precursor(precursor)
181-
gff3 = read_gtf_to_precursor(gtf)
181+
gff3 = read_gtf_to_precursor(gtf, database)
182182
gtf_dic = read_gtf_to_mirna(gtf)
183183
for line in range(0, len(gff3_data)):
184184
if not gff3_data[line]:

mirtop/gff/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ def reader(args):
2121
read.reader(args)
2222
return None
2323
samples = []
24-
database = mapper.guess_database(args)
24+
if args.database is None:
25+
database = mapper.guess_database(args)
26+
else:
27+
database = args.database
2528
args.database = database
2629
precursors = fasta.read_precursor(args.hairpin, args.sps)
2730
args.precursors = precursors
28-
matures = mapper.read_gtf_to_precursor(args.gtf)
31+
matures = mapper.read_gtf_to_precursor(args.gtf,database)
2932
args.matures = matures
3033
# TODO check numbers of miRNA and precursors read
3134
# TODO print message if numbers mismatch

mirtop/gff/convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def convert_gff_counts(args):
3030
'iso_add3p', 'iso_snp']
3131
if args.add_extra:
3232
precursors = fasta.read_precursor(args.hairpin, args.sps)
33-
matures = mapper.read_gtf_to_precursor(args.gtf)
33+
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
3434
variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt']
3535

3636
logger.info("INFO Reading GFF file %s", args.gff)

mirtop/gff/read.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def reader(args):
2020
args.database = database
2121
precursors = fasta.read_precursor(args.hairpin, args.sps)
2222
args.precursors = precursors
23-
matures = mapper.read_gtf_to_precursor(args.gtf)
23+
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
2424
args.matures = matures
2525
# TODO check numbers of miRNA and precursors read
2626
# TODO print message if numbers mismatch

mirtop/importer/prost.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def read_file(fn, hairpins, database, mirna_gtf):
4141
reads = defaultdict(hits)
4242
sample = os.path.splitext(os.path.basename(fn))[0]
4343
genomics = mapper.read_gtf_to_mirna(mirna_gtf)
44-
matures = mapper.read_gtf_to_precursor(mirna_gtf)
44+
matures = mapper.read_gtf_to_precursor(mirna_gtf, database)
4545
non_mirna = 0
4646
non_chromosome_mirna = 0
4747
outside_mirna = 0

mirtop/mirna/mapper.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def guess_database(args):
2020
2121
TODO: this needs to be generic to other databases.
2222
"""
23+
if not hasattr(args, "database"):
24+
args.database = None
2325
return _guess_database_file(args.gtf, args.database)
2426

2527

@@ -143,7 +145,7 @@ def read_gtf_chr2mirna2(gtf): # to remove
143145
return db_mir
144146

145147

146-
def read_gtf_to_precursor(gtf):
148+
def read_gtf_to_precursor(gtf,database):
147149
"""
148150
Load GTF file with precursor positions on genome
149151
Return dict with key being precursor name and
@@ -161,15 +163,26 @@ def read_gtf_to_precursor(gtf):
161163
"""
162164
if not gtf:
163165
return gtf
164-
if _guess_database_file(gtf).find("miRBase") > -1:
166+
if _guess_database_file(gtf,database).find("miRBase") > -1:
165167
mapped = read_gtf_to_precursor_mirbase(gtf)
166-
elif _guess_database_file(gtf).find("MirGeneDB") > -1:
168+
elif _guess_database_file(gtf,database).find("MirGeneDB") > -1:
167169
mapped = read_gtf_to_precursor_mirgenedb(gtf)
168170
else:
169171
logger.info("Database different than miRBase or MirGeneDB")
170172
logger.info("If you get an error when loading,")
171173
logger.info("report it to https://github.com/miRTop/mirtop/issues")
172-
mapped = read_gtf_to_precursor_mirbase(gtf)
174+
try:
175+
mapped = read_gtf_to_precursor_mirbase(gtf)
176+
return mapped
177+
except Exception as e:
178+
print(f"Failed to parse with Mirbase: {e}")
179+
try:
180+
mapped = read_gtf_to_precursor_mirgenedb(gtf)
181+
return mapped
182+
except Exception as e:
183+
print(f"Failed to parse with Mirgenedb: {e}")
184+
raise ValueError(f"There is no parser available for the database that you used: {database}")
185+
173186
return mapped
174187

175188

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ pybedtools
33
pandas
44
biopython
55
pyyaml
6-
pybedtools
76
six
7+
pytest

0 commit comments

Comments
 (0)