Skip to content

Commit b602acc

Browse files
authored
Merge pull request #111 from jbelyeu/master
Samplot minor fixes, mostly in VCF
2 parents 22612c9 + 650736b commit b602acc

File tree

4 files changed

+111
-23
lines changed

4 files changed

+111
-23
lines changed

.circleci/setup.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,13 @@ if [[ ! -d $WORKSPACE/anaconda ]]; then
3838

3939
mkdir -p $WORKSPACE/anaconda/conda-bld/$tag-64
4040

41-
# step 2: setup channels
41+
#step 2: downgrade python3.8 to 3.7
42+
if (( $pythonversion == 3 ))
43+
then
44+
conda install -y python=3.7
45+
fi
46+
47+
# step 3: setup channels
4248
conda config --system --add channels defaults
4349
conda config --system --add channels r
4450
conda config --system --add channels bioconda

samplot/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
#!/usr/bin/env python
2-
__version__ = "1.0.15"
2+
__version__ = "1.0.17"

samplot/samplot.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import matplotlib.ticker as ticker
1616
import numpy as np
1717
import pysam
18+
import warnings
19+
warnings.filterwarnings('ignore', 'FixedFormatter should only be used together with FixedLocator')
1820
from matplotlib.offsetbox import AnchoredText
1921

2022

@@ -275,7 +277,8 @@ def plot_coverage(
275277
max_coverage,
276278
tracktype,
277279
yaxis_label_fontsize,
278-
same_yaxis_labels=False,
280+
same_yaxis_labels,
281+
max_coverage_points,
279282
):
280283
"""Plots high and low quality coverage for the region
281284
@@ -290,7 +293,14 @@ def plot_coverage(
290293

291294
for i in range(len(ranges)):
292295
r = ranges[i]
293-
for pos in range(r.start, r.end + 1):
296+
region_len = r.end-r.start
297+
downsample = 1
298+
if region_len > max_coverage_points:
299+
downsample = int(region_len / max_coverage_points)
300+
301+
for i,pos in enumerate(range(r.start, r.end + 1)):
302+
if i%downsample != 0:
303+
continue
294304
cover_x.append(map_genome_point_to_range_points(ranges, r.chrm, pos))
295305
if r.chrm in coverage and pos in coverage[r.chrm]:
296306
cover_y_all.append(coverage[r.chrm][pos][0] + coverage[r.chrm][pos][1])
@@ -351,12 +361,14 @@ def plot_coverage(
351361
ax2.fill_between(
352362
cover_x, cover_y_lowqual, bottom_fill, color="grey", step="pre", alpha=0.15
353363
)
364+
## tracktype==None also allowed
354365

355366
# number of ticks should be 6 if there's one hp, 3 otherwise
356367
tick_count = 5 if hp_count == 1 else 2
357368
tick_count = max(int(max_plot_depth / tick_count), 1)
358369

359370
# set axis parameters
371+
#ax2.yaxis.set_major_locator(ticker.FixedLocator(tick_count))
360372
ax2.yaxis.set_major_locator(ticker.MultipleLocator(tick_count))
361373
ax2.tick_params(axis="y", colors="grey", labelsize=yaxis_label_fontsize)
362374
ax2.spines["top"].set_visible(False)
@@ -2133,7 +2145,7 @@ def bam_file(bam):
21332145
"-s",
21342146
"--start",
21352147
type=int,
2136-
help="Start position of region/variant",
2148+
help="Start position of region/variant (add multiple for translocation/BND events)",
21372149
action="append",
21382150
required=True,
21392151
)
@@ -2142,15 +2154,15 @@ def bam_file(bam):
21422154
"-e",
21432155
"--end",
21442156
type=int,
2145-
help="End position of region/variant",
2157+
help="End position of region/variant (add multiple for translocation/BND events)",
21462158
action="append",
21472159
required=True,
21482160
)
21492161

21502162
parser.add_argument(
21512163
"-c",
21522164
"--chrom", type=str,
2153-
help="Chromosome",
2165+
help="Chromosome (add multiple for translocation/BND events)",
21542166
action="append",
21552167
required=True
21562168
)
@@ -2210,6 +2222,14 @@ def gff_file(transcript_file):
22102222
required=False,
22112223
type=str,
22122224
)
2225+
2226+
parser.add_argument(
2227+
"--max_coverage_points",
2228+
help="number of points to plot in coverage axis (downsampled from region size for speed)",
2229+
required=False,
2230+
type=int,
2231+
default=1000,
2232+
)
22132233

22142234
def bed_file(annotation_file):
22152235
if not os.path.isfile(annotation_file):
@@ -2250,7 +2270,7 @@ def bed_file(annotation_file):
22502270
"--coverage_tracktype",
22512271
type=str,
22522272
help="type of track to use for low MAPQ " + "coverage plot.",
2253-
choices=["stack", "superimpose"],
2273+
choices=["stack", "superimpose", "none"],
22542274
default="stack",
22552275
required=False,
22562276
)
@@ -2779,6 +2799,7 @@ def plot_samples(
27792799
yaxis_label_fontsize,
27802800
annotation_files,
27812801
transcript_file,
2802+
max_coverage_points,
27822803
max_coverage,
27832804
marker_size,
27842805
):
@@ -2833,6 +2854,7 @@ def plot_samples(
28332854
coverage_tracktype,
28342855
yaxis_label_fontsize,
28352856
same_yaxis_scales,
2857+
max_coverage_points,
28362858
)
28372859

28382860
if len(curr_linked_reads) > 0:
@@ -2913,6 +2935,7 @@ def plot_samples(
29132935
curr_ax.tick_params(axis="y", labelsize=yaxis_label_fontsize)
29142936
# if there's one hp, 6 ticks fit. Otherwise, do 3
29152937
tick_count = 6 if len(hps) == 1 else 3
2938+
#curr_ax.yaxis.set_major_locator(ticker.FixedLocator(tick_count))
29162939
curr_ax.yaxis.set_major_locator(ticker.LinearLocator(tick_count))
29172940
curr_ax.tick_params(axis="both", length=0)
29182941
curr_ax.set_xticklabels([])
@@ -2950,7 +2973,7 @@ def plot_samples(
29502973
else:
29512974
sys.stderr.write("Ranges greater than 2 are not supported\n")
29522975
sys.exit(1)
2953-
2976+
29542977
curr_ax.set_xticklabels(labels, fontsize=xaxis_label_fontsize)
29552978
chrms = [x.chrm for x in ranges]
29562979
curr_ax.set_xlabel("Chromosomal position on " + "/".join(chrms), fontsize=8)
@@ -3565,6 +3588,7 @@ def plot(parser):
35653588
options.yaxis_label_fontsize,
35663589
options.annotation_files,
35673590
options.transcript_file,
3591+
options.max_coverage_points,
35683592
max_coverage,
35693593
marker_size,
35703594
)
@@ -3598,7 +3622,7 @@ def plot(parser):
35983622
matplotlib.rcParams["agg.path.chunksize"] = 100000
35993623
plt.tight_layout(pad=0.8, h_pad=0.1, w_pad=0.1)
36003624
try:
3601-
plt.savefig(output_file)
3625+
plt.savefig(output_file, dpi=100)
36023626
except Exception as e:
36033627
print(
36043628
"Failed to save figure " + output_file

samplot/samplot_vcf.py

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -440,30 +440,56 @@ def vcf(parser):
440440

441441
out_file = open(args.command_file, "w")
442442

443-
for variant in vcf:
443+
for var_count,variant in enumerate(vcf):
444+
try:
445+
translocation_chrom = variant.info.get("CHR2")
446+
except:
447+
translocation_chrom = None
444448
svtype = variant.info.get("SVTYPE", "SV")
445449
if args.important_regions:
446450
if not var_in_important_regions(
447451
important_regions, variant.chrom, variant.start, variant.stop
448452
):
453+
if args.debug:
454+
print("Skipping {} at {}:{}-{}, outside important_regions coordinates".format(
455+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
449456
continue
450457

451458
if svtype in ("INS"):
459+
if args.debug:
460+
print("Skipping {} at {}:{}-{}, INS type not supported".format(
461+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
452462
continue
453-
if variant.stop - variant.start > args.max_mb * 1000000:
463+
if args.max_mb and (variant.stop - variant.start > args.max_mb * 1000000):
464+
if args.debug:
465+
print("Skipping {} at {}:{}-{}, variant length greater than max_mb".format(
466+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
454467
continue
455-
if variant.stop - variant.start < args.min_bp:
468+
469+
if (variant.stop - variant.start < args.min_bp) and translocation_chrom is None:
470+
if args.debug:
471+
print("Skipping {} at {}:{}-{}, variant length shorter than min_bp".format(
472+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
456473
continue
457474

458475
gts = [s.get("GT", (None, None)) for s in variant.samples.values()]
459476

460477
if sum(None in g for g in gts) >= args.min_call_rate * len(vcf_samples):
478+
if args.debug:
479+
print("Skipping {} at {}:{}-{}, call rate of variant below min_call_rate".format(
480+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
461481
continue
462482
if args.max_hets:
463483
# requisite hets/hom-alts
464484
if sum(sum(x) >= 1 for x in gts if not None in x) > args.max_hets:
485+
if args.debug:
486+
print("Skipping {} at {}:{}-{}, more than max_hets heterozygotes".format(
487+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
465488
continue
466489
if not any(sum(x) > 0 for x in gts if not None in x):
490+
if args.debug:
491+
print("Skipping {} at {}:{}-{}, no samples have non-ref genotypes".format(
492+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
467493
continue
468494

469495
test_idxs = [i for i, gt in enumerate(gts) if not None in gt and sum(gt) > 0]
@@ -493,7 +519,10 @@ def vcf(parser):
493519
for i in idxs:
494520
if vcf_samples[i] in names_to_bams:
495521
variant_samples.append(vcf_samples[i])
496-
if len(variant_samples) <= 0:
522+
if len(variant_samples) == 0:
523+
if args.debug:
524+
print("Skipping {} at {}:{}-{}, no samples with matched alignment files have variant".format(
525+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
497526
continue
498527

499528
bams = [names_to_bams[s] for s in variant_samples]
@@ -510,6 +539,9 @@ def vcf(parser):
510539
is_dn.append(sample.id)
511540

512541
if len(is_dn) <= 0 and args.dn_only:
542+
if args.debug:
543+
print("Skipping {} at {}:{}-{}, dn_only selected and no de novos found".format(
544+
svtype, variant.chrom, variant.start, variant.stop),file=sys.stderr)
513545
continue
514546

515547
# save these for the html.
@@ -581,6 +613,7 @@ def vcf(parser):
581613

582614
data_dict = {
583615
"chrom": variant.chrom,
616+
"chrom2": translocation_chrom,
584617
"start": variant.start,
585618
"end": variant.stop,
586619
"svtype": svtype,
@@ -590,15 +623,17 @@ def vcf(parser):
590623
}
591624
if annotations:
592625
data_dict["overlaps"] = get_overlap(
593-
annotations, variant.chrom, variant.start, variant.stop
626+
annotations, variant.chrom, variant.start, variant.stop, translocation_chrom
594627
)
595628
if dn_row != "":
596629
data_dict["dn"] = ",".join(is_dn)
630+
if translocation_chrom is None:
631+
template = "{svtype}_{chrom}_{start}_{end}.{itype}"
632+
else:
633+
template = "{svtype}_{chrom}_{start}_{chrom2}_{end}.{itype}"
597634
fig_path = os.path.join(
598635
args.out_dir,
599-
"{svtype}_{chrom}_{start}_{end}.{itype}".format(
600-
itype=args.output_type, **data_dict
601-
),
636+
template.format(itype=args.output_type, **data_dict),
602637
)
603638
tabledata.append(data_dict)
604639

@@ -633,8 +668,22 @@ def vcf(parser):
633668
else:
634669
title_list.append(variant_sample)
635670

671+
template = ("samplot plot {extra_args} -z {z} -n {titles}"
672+
+ "{cipos} {ciend} {svtype} -c {chrom} -s {start} "
673+
+ "-e {end} -o {fig_path} -d {downsample} -b {bams}")
674+
start = variant.start
675+
stop = variant.stop
676+
start2 = None
677+
stop2 = None
678+
if translocation_chrom is not None:
679+
template += " -c {chrom2} -s {start2} -e {end2}"
680+
start2 = stop
681+
stop2 = start2 + 1
682+
stop = start + 1
683+
template += "\n"
684+
636685
out_file.write(
637-
"samplot plot {extra_args} -z {z} -n {titles} {cipos} {ciend} {svtype} -c {chrom} -s {start} -e {end} -o {fig_path} -d {downsample} -b {bams}\n".format(
686+
template.format(
638687
extra_args=" ".join(pass_through_args),
639688
bams=" ".join(bams),
640689
titles=" ".join(title_list),
@@ -644,11 +693,16 @@ def vcf(parser):
644693
svtype="-t " + svtype if svtype != "SV" else "",
645694
fig_path=fig_path,
646695
chrom=variant.chrom,
647-
start=variant.start,
648-
end=variant.stop,
696+
start=start,
697+
end=stop,
649698
downsample=args.downsample,
699+
chrom2=translocation_chrom,
700+
start2=start2,
701+
end2=stop2,
650702
)
651703
)
704+
if args.debug:
705+
print("VCF entry count:",var_count+1 ,file=sys.stderr)
652706

653707
if args.command_file:
654708
out_file.close()
@@ -750,7 +804,6 @@ def add_vcf(parent_parser):
750804
"--max_mb",
751805
type=int,
752806
help="skip variants longer than this many megabases",
753-
default=1,
754807
)
755808
parser.add_argument(
756809
"--min_bp",
@@ -802,7 +855,12 @@ def add_vcf(parent_parser):
802855
default=False,
803856
action="store_true",
804857
)
805-
858+
parser.add_argument(
859+
"--debug",
860+
help="prints out the reason each skipped variant entry is skipped",
861+
default=False,
862+
action="store_true",
863+
)
806864
parser.set_defaults(func=vcf)
807865

808866

0 commit comments

Comments
 (0)