Skip to content

Commit 3087524

Browse files
authored
Merge pull request #128 from jbelyeu/master
max coverage limit
2 parents 2fb0b75 + c8815b7 commit 3087524

File tree

6 files changed

+180
-54
lines changed

6 files changed

+180
-54
lines changed

README.md

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,8 @@ substantiate the SV.
1616
<summary>samplot plot</summary>
1717

1818
```
19-
usage: samplot plot [-h] [--marker_size MARKER_SIZE] [-n TITLES [TITLES ...]]
20-
[-r REFERENCE] [-z Z] -b BAMS [BAMS ...] -o OUTPUT_FILE -s
21-
START -e END -c CHROM [-w WINDOW] [-d MAX_DEPTH]
22-
[--minq MINQ] [-t SV_TYPE] [-T TRANSCRIPT_FILE]
23-
[-A ANNOTATION_FILES [ANNOTATION_FILES ...]]
24-
[--coverage_tracktype {stack,superimpose}] [-a]
25-
[-H PLOT_HEIGHT] [-W PLOT_WIDTH] [-q MIN_MQUAL] [-j]
26-
[--start_ci START_CI] [--end_ci END_CI]
27-
[--long_read LONG_READ] [--min_event_size MIN_EVENT_SIZE]
28-
[--xaxis_label_fontsize XAXIS_LABEL_FONTSIZE]
29-
[--yaxis_label_fontsize YAXIS_LABEL_FONTSIZE]
30-
[--legend_fontsize LEGEND_FONTSIZE]
31-
[--annotation_fontsize ANNOTATION_FONTSIZE]
32-
[--common_insert_size] [--hide_annotation_labels]
33-
[--coverage_only] [--same_yaxis_scales] [--zoom ZOOM]
34-
[--debug DEBUG]
35-
3619
optional arguments:
3720
-h, --help show this help message and exit
38-
--marker_size MARKER_SIZE
39-
Size of marks on pairs and splits (default 3)
4021
-n TITLES [TITLES ...], --titles TITLES [TITLES ...]
4122
Space-delimited list of plot titles. Use quote marks
4223
to include spaces (i.e. "plot 1" "plot 2")
@@ -46,36 +27,53 @@ optional arguments:
4627
-b BAMS [BAMS ...], --bams BAMS [BAMS ...]
4728
Space-delimited list of BAM/CRAM file names
4829
-o OUTPUT_FILE, --output_file OUTPUT_FILE
49-
Output file name
30+
Output file name/type. Defaults to
31+
{type}_{chrom}_{start}_{end}.png
32+
--output_dir OUTPUT_DIR
33+
Output directory name. Defaults to working dir.
34+
Ignored if --output_file is set
5035
-s START, --start START
51-
Start position of region/variant
52-
-e END, --end END End position of region/variant
36+
Start position of region/variant (add multiple for
37+
translocation/BND events)
38+
-e END, --end END End position of region/variant (add multiple for
39+
translocation/BND events)
5340
-c CHROM, --chrom CHROM
54-
Chromosome
41+
Chromosome (add multiple for translocation/BND events)
5542
-w WINDOW, --window WINDOW
5643
Window size (count of bases to include in view),
5744
default(0.5 * len)
5845
-d MAX_DEPTH, --max_depth MAX_DEPTH
5946
Max number of normal pairs to plot
60-
--minq MINQ coverage from reads with MAPQ <= minq plotted in
61-
lighter grey. To disable, pass in negative value
6247
-t SV_TYPE, --sv_type SV_TYPE
6348
SV type. If omitted, plot is created without variant
6449
bar
6550
-T TRANSCRIPT_FILE, --transcript_file TRANSCRIPT_FILE
66-
GFF of transcripts
51+
GFF3 of transcripts
52+
--transcript_filename TRANSCRIPT_FILENAME
53+
Name for transcript track
54+
--max_coverage_points MAX_COVERAGE_POINTS
55+
number of points to plot in coverage axis (downsampled
56+
from region size for speed)
6757
-A ANNOTATION_FILES [ANNOTATION_FILES ...], --annotation_files ANNOTATION_FILES [ANNOTATION_FILES ...]
6858
Space-delimited list of bed.gz tabixed files of
6959
annotations (such as repeats, mappability, etc.)
70-
--coverage_tracktype {stack,superimpose}
60+
--annotation_filenames ANNOTATION_FILENAMES [ANNOTATION_FILENAMES ...]
61+
Space-delimited list of names for the tracks in
62+
--annotation_files
63+
--coverage_tracktype {stack,superimpose,none}
7164
type of track to use for low MAPQ coverage plot.
7265
-a, --print_args Print commandline arguments
7366
-H PLOT_HEIGHT, --plot_height PLOT_HEIGHT
7467
Plot height
7568
-W PLOT_WIDTH, --plot_width PLOT_WIDTH
7669
Plot width
77-
-q MIN_MQUAL, --min_mqual MIN_MQUAL
70+
-q INCLUDE_MQUAL, --include_mqual INCLUDE_MQUAL
7871
Min mapping quality of reads to be included in plot
72+
(default 1)
73+
--separate_mqual SEPARATE_MQUAL
74+
coverage from reads with MAPQ <= separate_mqual
75+
plotted in lighter grey. To disable, pass in negative
76+
value
7977
-j, --json_only Create only the json file, not the image plot
8078
--start_ci START_CI confidence intervals of SV first breakpoint (distance
8179
from the breakpoint). Must be a comma-separated pair
@@ -86,9 +84,10 @@ optional arguments:
8684
--long_read LONG_READ
8785
Min length of a read to be treated as a long-read
8886
(default 1000)
87+
--ignore_hp Choose to ignore HP tag in alignment files
8988
--min_event_size MIN_EVENT_SIZE
9089
Min size of an event in long-read CIGAR to include
91-
(default 100)
90+
(default 20)
9291
--xaxis_label_fontsize XAXIS_LABEL_FONTSIZE
9392
Font size for X-axis labels (default 6)
9493
--yaxis_label_fontsize YAXIS_LABEL_FONTSIZE
@@ -100,10 +99,17 @@ optional arguments:
10099
--common_insert_size Set common insert size for all plots
101100
--hide_annotation_labels
102101
Hide the label (fourth column text) from annotation
103-
files, useful for region with many annotations
102+
files, useful for regions with many annotations
104103
--coverage_only Hide all reads and show only coverage
104+
--max_coverage MAX_COVERAGE
105+
apply a maximum coverage cutoff. Unlimited by default
105106
--same_yaxis_scales Set the scales of the Y axes to the max of all
106-
--zoom ZOOM Only show +- zoom amount around breakpoints
107+
--marker_size MARKER_SIZE
108+
Size of marks on pairs and splits (default 3)
109+
--dpi DPI Dots per inches (pixel count, default 300)
110+
--zoom ZOOM Only show +- zoom amount around breakpoints, much
111+
faster for large regions. Ignored if region smaller
112+
than --zoom (default 500000)
107113
--debug DEBUG Print debug statements
108114
```
109115
</details>

samplot/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
#!/usr/bin/env python
2-
__version__ = "1.1.1"
2+
__version__ = "1.1.4"

samplot/samplot.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def points_in_window(points):
177177

178178
# {{{ def get_tabix_iter(chrm, start, end, datafile):
179179
def get_tabix_iter(chrm, start, end, datafile):
180-
"""Gets an iterator from a tabix BED/GFF/GFF3 file
180+
"""Gets an iterator from a tabix BED/GFF3 file
181181
182182
Used to avoid chrX vs. X notation issues when extracting data from
183183
annotation files
@@ -277,7 +277,6 @@ def plot_coverage(
277277
max_coverage,
278278
tracktype,
279279
yaxis_label_fontsize,
280-
same_yaxis_labels,
281280
max_coverage_points,
282281
):
283282
"""Plots high and low quality coverage for the region
@@ -314,8 +313,8 @@ def plot_coverage(
314313
cover_y_highqual = np.array(cover_y_highqual)
315314
cover_y_all = np.array(cover_y_all)
316315

317-
if max_coverage > 0 and same_yaxis_labels:
318-
max_plot_depth = max_coverage+5
316+
if max_coverage > 0:
317+
max_plot_depth = max_coverage
319318
elif cover_y_all.max() > 3 * cover_y_all.mean():
320319
max_plot_depth = max(
321320
np.percentile(cover_y_all, 99.5), np.percentile(cover_y_all, 99.5)
@@ -2209,7 +2208,7 @@ def gff_file(transcript_file):
22092208
ext = os.path.splitext(fields[0])[1][1:]
22102209
ext = ext.lower()
22112210
if ext not in options:
2212-
parser.error("transcript file {} is not in GFF/GFF3 format".format(transcript_file))
2211+
parser.error("transcript file {} is not in GFF3 format".format(transcript_file))
22132212

22142213
idx_file = transcript_file + ".tbi"
22152214
if not os.path.isfile(idx_file):
@@ -2427,6 +2426,13 @@ def bed_file(annotation_file):
24272426
required=False,
24282427
)
24292428

2429+
parser.add_argument(
2430+
"--max_coverage",
2431+
default=0,
2432+
type=int,
2433+
help="apply a maximum coverage cutoff. Unlimited by default",
2434+
)
2435+
24302436
parser.add_argument(
24312437
"--same_yaxis_scales",
24322438
action="store_true",
@@ -2751,7 +2757,8 @@ def get_read_data(
27512757
read_data["all_pairs"] = downsample_pairs(
27522758
max_depth, z_score, read_data["all_pairs"]
27532759
)
2754-
2760+
if not same_yaxis_scales:
2761+
max_coverage = 0
27552762
return read_data, max_coverage
27562763

27572764

@@ -2864,7 +2871,6 @@ def plot_samples(
28642871
max_coverage,
28652872
coverage_tracktype,
28662873
yaxis_label_fontsize,
2867-
same_yaxis_scales,
28682874
max_coverage_points,
28692875
)
28702876

@@ -3292,6 +3298,8 @@ def get_transcript_plan(ranges, transcript_file):
32923298

32933299
for r in ranges:
32943300
itr = get_tabix_iter(r.chrm, r.start, r.end, transcript_file)
3301+
if not itr:
3302+
continue
32953303
for row in itr:
32963304
gene_annotation = row.rstrip().split()
32973305

@@ -3584,6 +3592,8 @@ def plot(parser):
35843592
options.start_ci,
35853593
options.end_ci,
35863594
)
3595+
if options.max_coverage:
3596+
max_coverage = options.max_coverage
35873597

35883598
# Plot each sample
35893599
current_axis_idx = plot_samples(

samplot/samplot_vcf.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,17 @@ def get_format_fields(ids, variant):
104104
returns:
105105
list
106106
"""
107-
fields = list()
108-
for i in ids:
109-
fields.append(
110-
["%s=%s" % (i, flatten(j.get(i, ""))) for j in variant.samples.values()]
111-
)
112-
return zip_lists(fields)
107+
sample_format = []
108+
for i,sample_fields in enumerate(variant.samples.values()):
109+
for field_id in ids:
110+
sample_field_val = flatten(sample_fields.get(field_id, ""))
111+
if sample_field_val:
112+
if len(sample_format) < i+1:
113+
sample_format.append("")
114+
else:
115+
sample_format[i] += " "
116+
sample_format[i] += "{}={}".format(field_id,sample_field_val)
117+
return sample_format
113118

114119

115120
def get_format_title(samples, ids, variant):
@@ -175,8 +180,8 @@ def get_overlap(
175180
[i.split("\t")[2].lower() for i in tabix.fetch(chrom, start, end)]
176181
)
177182
except IndexError:
178-
# probably not a gff or gtf
179-
print("Invalid annotation file specified for --gff")
183+
# probably not a gff3
184+
print("Invalid annotation file specified for --gff3")
180185
overlaps = None
181186
except ValueError:
182187
if fix_chr:
@@ -415,8 +420,8 @@ def vcf(parser):
415420
vcf_samples_list = list(vcf_samples)
416421

417422
annotations = None
418-
if args.gff:
419-
annotations = pysam.TabixFile(args.gff)
423+
if args.gff3:
424+
annotations = pysam.TabixFile(args.gff3)
420425

421426
filters = [to_exprs(f) for f in args.filter]
422427

@@ -721,7 +726,7 @@ def vcf(parser):
721726
html_template.render(
722727
data=tabledata,
723728
plot_type=args.output_type,
724-
gff="true" if annotations else "false",
729+
gff3="true" if annotations else "false",
725730
denovo="true" if dn_row else "false",
726731
),
727732
file=fh,
@@ -845,7 +850,7 @@ def add_vcf(parent_parser):
845850
help="comma separated list of FORMAT fields to include in sample plot title",
846851
)
847852
parser.add_argument(
848-
"--gff",
853+
"--gff3",
849854
help="genomic regions (.gff with .tbi in same directory) used when building HTML table and table filters",
850855
)
851856
parser.add_argument(

samplot/templates/samplot_vcf.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ <h5>SV Overlaps</h5>
201201
<script>
202202
const data = {{ data|tojson }}
203203
const plot_type = "{{plot_type}}"
204-
const annotation = {{ gff }}
204+
const annotation = {{ gff3 }}
205205
const denovo = {{ denovo }}
206206

207207
dc.config.defaultColors(d3.schemeSet1)

0 commit comments

Comments
 (0)