Skip to content

Commit a3bca71

Browse files
authored
Merge pull request #10 from yhoogstrate/regions
v2.3.0
2 parents 32f5726 + 244a508 commit a3bca71

File tree

9 files changed

+74
-9
lines changed

9 files changed

+74
-9
lines changed

Changelog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
13-sept-2018: Youri Hoogstrate
2+
* v2.3.0 - Adds arguments -r/--region and -b/--bed-regions to target
3+
specific genomic regions. This way alternative loci can be
4+
excluded for instance.
5+
16
13-sept-2018: Youri Hoogstrate
27
* v2.2.0 - Removes --roc argument and adds multiple statistics
38
including ROC to the -s/--stats argument.

README.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
bam-lorenz-coverage
22
===================
33

4-
Generate Lorenz plots and Coverage plots directly from BAM files
4+
This is a free software package that very easily allows you to generate
5+
Lorenz plots and Coverage plots, directly from a BAM file. It can also
6+
output the tables as text documents so you can generate custom plots.
7+
There is also support to only analyse specific regions.
58

69
Implemented in:
710
* Python3 + Matplotlib + Pysam
@@ -19,6 +22,12 @@ $ python setup.py install
1922
$ bam-lorenz-coverage --help
2023
```
2124

25+
Possible issues:
26+
- pysam is currently incompatible with python 3.7 - manual installation of pysam is still possible (git clone + python setup.py install)
27+
- matplotlib depends on Tk but does not throw an error if it is missing during installation, only at runtime
28+
* debian/ubuntu: sudo apt-get install python3-tk
29+
* arch: pacman -Sy tk
30+
2231
Usage:
2332

2433
```
@@ -27,13 +36,15 @@ Usage: bam-lorenz-coverage [OPTIONS] INPUT_ALIGNMENT_FILE
2736
Options:
2837
--version Show the version and exit.
2938
-l, --lorenz-table TEXT Output table Lorenz-curve (for stdout use: -)
30-
-x, --roc Output Lorenz-curve ROC to $lorenz_table.roc.txt
31-
[ requires --lorenz-table to be set to file ]
3239
-c, --coverage-table TEXT Output table Coverage-graph (for stdout use: -)
3340
-L, --lorenz-svg TEXT Output figure Lorenz-curve (SVG).
3441
-C, --coverage-svg TEXT Output figure Coverage-graph (SVG).
42+
-s, --stats TEXT Output additional stats to text-file
43+
-r, --region TEXT Scan depth only in selected region <chr:from-to>
44+
(all positions: 1-based)
45+
-b, --bed-regions TEXT Scan depth only in selected positions or regions
46+
(BED file: start: 0-based & end: 1-based)
3547
--help Show this message and exit.
36-
3748
```
3849

3950
The lowercase arguments (-l, -c) allow extraction of the raw data tables for custom plotting. The uppercase arguments (-L, -C) directly generate a plot. The implemented plot only contains one sample per plot. For multi-sample plots, use the column tables and your imagination.

bin/bam-lorenz-coverage

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@ from blc.blc import bamlorenzcoverage
1616
@click.option('-L', '--lorenz-svg', nargs=1, help='Output figure Lorenz-curve (SVG).')
1717
@click.option('-C', '--coverage-svg', nargs=1, help='Output figure Coverage-graph (SVG).')
1818
@click.option('-s', '--stats', nargs=1, help='Output additional stats to text-file')
19-
def CLI(lorenz_table, coverage_table, lorenz_svg, coverage_svg, input_alignment_file, stats):
19+
@click.option('-r', '--region', nargs=1, help='Scan depth only in selected region <chr:from-to> (all positions: 1-based)')
20+
@click.option('-b', '--bed-regions', nargs=1, help='Scan depth only in selected positions or regions (BED file: start: 0-based & end: 1-based)')
21+
def CLI(lorenz_table, coverage_table, lorenz_svg, coverage_svg, input_alignment_file, stats, region, bed_regions):
2022
b = bamlorenzcoverage()
21-
idx_observed, n = b.bam_file_to_idx(input_alignment_file)
23+
idx_observed, n = b.bam_file_to_idx(input_alignment_file, region, bed_regions)
2224

2325
if coverage_table or coverage_svg:
2426
cumulative_coverage_curves = b.estimate_cumulative_coverage_curves(idx_observed)

blc/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""[License: GNU General Public License v3 (GPLv3)]
66
"""
77

8-
__version_info__ = ('2', '2', '0')
8+
__version_info__ = ('2', '3', '0')
99
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3]
1010
__author__ = 'Youri Hoogstrate'
1111
__homepage__ = 'https://github.com/yhoogstrate/bam-lorenz-coverage'

blc/blc.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class bamlorenzcoverage:
1313
def __init__(self):
1414
pass
1515

16-
def bam_file_to_idx(self, bam_file):
16+
def bam_file_to_idx(self, bam_file, region=None, bed_regions=None):
1717
"""
1818
Coverage plot needs the zero-statistic - i.e. the number of genomic bases not covered by reads
1919
"""
@@ -28,8 +28,15 @@ def bam_file_to_idx(self, bam_file):
2828
tmp_filename = os.path.join(tempfile.mkdtemp() + '.fifo')
2929
os.mkfifo(tmp_filename)
3030

31+
cmd = ['-a', bam_file]
32+
if region:
33+
cmd = ['-r', region] + cmd
34+
elif bed_regions:
35+
cmd = ['-b', bed_regions] + cmd
36+
# print(cmd)
37+
3138
# I tried this with the Threading class but this often didnt parallelize
32-
parallel_thread = Process(target=pysam.samtools.depth, args=['-a', bam_file], kwargs={'save_stdout': tmp_filename})
39+
parallel_thread = Process(target=pysam.samtools.depth, args=cmd, kwargs={'save_stdout': tmp_filename})
3340
parallel_thread.start()
3441

3542
fh = os.open(tmp_filename, os.O_RDONLY)

tests/blc/test_blc_012.sam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test_blc_011.sam

tests/blc/test_blc_013.bed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
chr1 2 14

tests/blc/test_blc_013.sam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test_blc_011.sam

tests/test_class_blc.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,43 @@ def test_011_lorenz_03(self):
226226
self.assertEqual(lc['total_sequenced_bases'], 10)
227227
self.assertEqual(lc['total_covered_positions_of_genome'], 8)
228228

229+
def test_012_region(self):
230+
# everything covered, is at least covered even densely
231+
# x x x x x
232+
# x x x x x
233+
# - - - - - - - - - - - - - -
234+
235+
test_id = 'blc_012'
236+
237+
input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
238+
input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"
239+
240+
sam_to_sorted_bam(input_file_sam, input_file_bam)
241+
242+
b = bamlorenzcoverage()
243+
idx, n = b.bam_file_to_idx(input_file_bam, 'chr1:2-14')
244+
245+
self.assertEqual(n, 13) # sam header say reference size is 14, but we start at 2nd position
246+
247+
def test_013_bed(self):
248+
# everything covered, is at least covered even densely
249+
# x x x x x
250+
# x x x x x
251+
# - - - - - - - - - - - - - -
252+
253+
test_id = 'blc_013'
254+
255+
input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
256+
input_file_bed = TEST_DIR + "test_" + test_id + ".bed"
257+
input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"
258+
259+
sam_to_sorted_bam(input_file_sam, input_file_bam)
260+
261+
b = bamlorenzcoverage()
262+
idx, n = b.bam_file_to_idx(input_file_bam, None, input_file_bed)
263+
264+
self.assertEqual(n, 12)
265+
229266

230267
if __name__ == '__main__':
231268
main()

0 commit comments

Comments
 (0)