Skip to content

Commit 59afe55

Browse files
authored
Merge pull request #37 from nick-youngblut/master
better error reporting for sra-stat added logging reformated argparsing added function docs refactored towards pip8
2 parents 36a113d + f5c908d commit 59afe55

File tree

2 files changed

+98
-40
lines changed

2 files changed

+98
-40
lines changed

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ this will get you the sra-tools dependency as well.
2626

2727
Examples
2828
--------
29-
``$ parallel-fastq-dump --sra-id SRR1219899 --threads 4 --outdir out/ --split-files --gzip``
29+
``$ parallel-fastq-dump --sra-id SRR2244401 --threads 4 --outdir out/ --split-files --gzip``
3030

3131
Micro Benchmark
3232
---------------

parallel-fastq-dump

Lines changed: 97 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,43 +5,80 @@ import shutil
55
import tempfile
66
import subprocess
77
import argparse
8+
import logging
9+
10+
__version__ = '0.6.7'
11+
12+
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG)
13+
class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
14+
argparse.RawDescriptionHelpFormatter):
15+
pass
16+
17+
desc = 'parallel fastq-dump wrapper, extra args will be passed through'
18+
epi = """DESCRIPTION:
19+
Example: parallel-fastq-dump --sra-id SRR2244401 --threads 4 --outdir out/ --split-files --gzip
20+
"""
21+
22+
parser = argparse.ArgumentParser(description=desc, epilog=epi,
23+
formatter_class=CustomFormatter)
24+
argparse.ArgumentDefaultsHelpFormatter
25+
parser.add_argument('-s','--sra-id', help='SRA id', action='append')
26+
parser.add_argument('-t','--threads', help='number of threads', default=1, type=int)
27+
parser.add_argument('-O','--outdir', help='output directory', default='.')
28+
parser.add_argument('-T', '--tmpdir', help='temporary directory', default=None)
29+
parser.add_argument('-N','--minSpotId', help='Minimum spot id', default=1, type=int)
30+
parser.add_argument('-X','--maxSpotId', help='Maximum spot id', default=None, type=int)
31+
parser.add_argument('-V', '--version', help='shows version', action='store_true', default=False)
832

9-
__version__ = "0.6.6"
1033

1134
def pfd(args, srr_id, extra_args):
12-
tmp_dir = tempfile.TemporaryDirectory(prefix="pfd_",dir=args.tmpdir)
13-
sys.stderr.write("tempdir: {}\n".format(tmp_dir.name))
35+
"""
36+
Parallel fastq dump
37+
Parameters
38+
----------
39+
args : dict
40+
User-provided args
41+
srr_id : str
42+
SRR ID
43+
extra_args : dict
44+
Extra args
45+
"""
46+
tmp_dir = tempfile.TemporaryDirectory(prefix='pfd_',dir=args.tmpdir)
47+
logging.info('tempdir: {}'.format(tmp_dir.name))
1448

1549
n_spots = get_spot_count(srr_id)
16-
sys.stderr.write("{} spots: {}\n".format(srr_id,n_spots))
50+
logging.info('{} spots: {}'.format(srr_id,n_spots))
1751

1852
# minSpotId cant be lower than 1
1953
start = max(args.minSpotId, 1)
2054
# maxSpotId cant be higher than n_spots
2155
end = min(args.maxSpotId, n_spots) if args.maxSpotId is not None else n_spots
2256

2357
blocks = split_blocks(start, end, args.threads)
24-
sys.stderr.write("blocks: {}\n".format(blocks))
25-
58+
logging.info('blocks: {}'.format(blocks))
59+
2660
ps = []
2761
for i in range(0,args.threads):
2862
d = os.path.join(tmp_dir.name, str(i))
2963
os.mkdir(d)
30-
p = subprocess.Popen(["fastq-dump", "-N", str(blocks[i][0]), "-X", str(blocks[i][1]), "-O", d]+extra_args+[srr_id])
64+
cmd = ['fastq-dump', '-N', str(blocks[i][0]), '-X', str(blocks[i][1]),
65+
'-O', d] + extra_args + [srr_id]
66+
logging.info('CMD: {}'.format(' '.join(cmd)))
67+
p = subprocess.Popen(cmd)
3168
ps.append(p)
3269

3370
wfd = {}
3471
for i in range(0,args.threads):
3572
exit_code = ps[i].wait()
3673
if exit_code != 0:
37-
sys.stderr.write("fastq-dump error! exit code: {}\n".format(exit_code))
74+
logging.warning('fastq-dump error! exit code: {}'.format(exit_code))
3875
sys.exit(1)
3976

4077
tmp_path = os.path.join(tmp_dir.name, str(i))
4178
for fo in os.listdir(tmp_path):
4279
if fo not in wfd:
43-
wfd[fo] = open(os.path.join(args.outdir,fo), "wb")
44-
with open(os.path.join(tmp_path,fo), "rb") as fd:
80+
wfd[fo] = open(os.path.join(args.outdir,fo), 'wb')
81+
with open(os.path.join(tmp_path,fo), 'rb') as fd:
4582
shutil.copyfileobj(fd, wfd[fo])
4683
os.remove(os.path.join(tmp_path,fo))
4784

@@ -61,12 +98,28 @@ def split_blocks(start, end, n_pieces):
6198
return out
6299

63100
def get_spot_count(sra_id):
64-
p = subprocess.Popen(["sra-stat", "--meta", "--quick", sra_id], stdout=subprocess.PIPE)
65-
stdout, stderr = p.communicate()
66-
txt = stdout.decode().rstrip().split("\n")
101+
"""
102+
Get spot count via sra-stat
103+
Parameters
104+
----------
105+
sra_id : str
106+
SRA ID
107+
"""
108+
cmd = ['sra-stat', '--meta', '--quick', sra_id]
109+
logging.info('CMD: {}'.format(' '.join(cmd)))
110+
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
111+
stdout, stderr = p.communicate()
112+
txt = stdout.decode().rstrip().split('\n')
67113
total = 0
68-
for l in txt:
69-
total += int(l.split("|")[2].split(":")[0])
114+
try:
115+
for l in txt:
116+
total += int(l.split('|')[2].split(':')[0])
117+
except IndexError:
118+
msg = 'sra-stat output parsing error!'
119+
msg += '\n--sra-stat STDOUT--\n{}'
120+
msg += '\n--sra-stat STDERR--\n{}'
121+
etxt = stderr.decode().rstrip().split('\n')
122+
raise IndexError(msg.format('\n'.join(txt), '\n'.join(etxt)))
70123
return total
71124

72125
def partition(f, l):
@@ -79,46 +132,51 @@ def partition(f, l):
79132
return r
80133

81134
def is_sra_file(path):
135+
"""
136+
Determine whether path is SRA file
137+
parameters
138+
----------
139+
path : str
140+
file path
141+
"""
82142
f = os.path.basename(path)
83143
if f.lower().endswith('.sra'): return True
84-
if "SRR" in f.upper(): return True
85-
if "ERR" in f.upper(): return True
86-
if "DRR" in f.upper(): return True
144+
if 'SRR' in f.upper(): return True
145+
if 'ERR' in f.upper(): return True
146+
if 'DRR' in f.upper(): return True
87147
return False
88148

89149
def main():
90-
parser = argparse.ArgumentParser(description="parallel fastq-dump wrapper, extra args will be passed through")
91-
parser.add_argument("-s","--sra-id", help="SRA id", action="append")
92-
parser.add_argument("-t","--threads", help="number of threads", default=1, type=int)
93-
parser.add_argument("-O","--outdir", help="output directory", default=".")
94-
parser.add_argument("--tmpdir", help="temporary directory", default=None)
95-
parser.add_argument("-N","--minSpotId", help="Minimum spot id", default=1, type=int)
96-
parser.add_argument("-X","--maxSpotId", help="Maximum spot id", default=None, type=int)
97-
parser.add_argument("-V", "--version", help="shows version", action="store_true")
150+
"""
151+
Main interface
152+
"""
98153
args, extra = parser.parse_known_args()
99-
100154
if args.version:
101-
print("parallel-fastq-dump : {}".format(__version__))
102-
subprocess.Popen(["fastq-dump", "-V"]).wait()
155+
print('parallel-fastq-dump : {}'.format(__version__))
156+
subprocess.Popen(['fastq-dump', '-V']).wait()
103157
sys.exit(0)
104158

105159
elif args.sra_id:
106-
extra_srrs, extra_args = partition(is_sra_file,extra)
160+
extra_srrs, extra_args = partition(is_sra_file,extra)
107161
args.sra_id.extend(extra_srrs)
108-
sys.stderr.write("SRR ids: {}\n".format(args.sra_id))
109-
sys.stderr.write("extra args: {}\n".format(extra_args))
110-
111-
if args.outdir:
112-
if not os.path.isdir(args.outdir):
113-
os.mkdir(args.outdir)
114-
162+
logging.info('SRR ids: {}'.format(args.sra_id))
163+
logging.info('extra args: {}'.format(extra_args))
164+
165+
# output directory
166+
if not os.path.isdir(args.outdir) and args.outdir != '.':
167+
os.makedirs(args.outdir)
168+
# temp directory
169+
if (args.tmpdir is not None and
170+
not os.path.isdir(args.tmpdir)
171+
and args.tmpdir != '.'):
172+
os.makedirs(args.tmpdir)
173+
# fastq dump
115174
for si in args.sra_id:
116175
pfd(args, si, extra_args)
117-
118176
else:
119177
parser.print_help()
120178
sys.exit(1)
121179

122-
if __name__ == "__main__":
180+
if __name__ == '__main__':
123181
main()
124182

0 commit comments

Comments
 (0)