Skip to content

Commit 3b918b6

Browse files
authored
Merge pull request #263 from ENCODE-DCC/PIPE-36_add_more_info_to_report
more info in qc_report
2 parents fbf7b20 + 9594bee commit 3b918b6

File tree

2 files changed

+26
-8
lines changed

2 files changed

+26
-8
lines changed

chip.wdl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,6 +2096,7 @@ workflow chip {
20962096
ctl_paired_ends = ctl_paired_end_,
20972097
pipeline_type = pipeline_type,
20982098
aligner = aligner_,
2099+
no_dup_removal = no_dup_removal,
20992100
peak_caller = peak_caller_,
21002101
cap_num_peak = cap_num_peak_,
21012102
idr_thresh = idr_thresh,
@@ -3046,6 +3047,7 @@ task qc_report {
30463047
Array[Boolean] ctl_paired_ends
30473048
String pipeline_type
30483049
String aligner
3050+
Boolean no_dup_removal
30493051
String peak_caller
30503052
Int cap_num_peak
30513053
Float idr_thresh
@@ -3105,6 +3107,7 @@ task qc_report {
31053107
command {
31063108
set -e
31073109
python3 $(which encode_task_qc_report.py) \
3110+
--pipeline-prefix chip \
31083111
${'--pipeline-ver ' + pipeline_ver} \
31093112
${"--title '" + sub(title,"'","_") + "'"} \
31103113
${"--desc '" + sub(description,"'","_") + "'"} \
@@ -3114,6 +3117,7 @@ task qc_report {
31143117
--ctl-paired-ends ${sep=' ' ctl_paired_ends} \
31153118
--pipeline-type ${pipeline_type} \
31163119
--aligner ${aligner} \
3120+
${if (no_dup_removal) then '--no-dup-removal ' else ''} \
31173121
--peak-caller ${peak_caller} \
31183122
${'--cap-num-peak ' + cap_num_peak} \
31193123
--idr-thresh ${idr_thresh} \

src/encode_task_qc_report.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ def parse_arguments():
3434
help='Description for sample.')
3535
parser.add_argument('--genome', type=str,
3636
help='Reference genome.')
37+
parser.add_argument('--pipeline-prefix', type=str, required=True,
38+
help='Pipeline. e.g. atac, chip.')
3739
parser.add_argument('--pipeline-ver', type=str,
3840
help='Pipeline version.')
3941
parser.add_argument('--multimapping', default=0, type=int,
@@ -50,6 +52,8 @@ def parse_arguments():
5052
help='Pipeline type.')
5153
parser.add_argument('--aligner', type=str, required=True,
5254
help='Aligner.')
55+
parser.add_argument('--no-dup-removal', action='store_true',
56+
help='No duplicate removal.')
5357
parser.add_argument('--peak-caller', type=str, required=True,
5458
help='Peak caller.')
5559
parser.add_argument('--cap-num-peak', default=0, type=int,
@@ -302,7 +306,7 @@ def make_cat_align(args, cat_root):
302306
html_head='<h2>Marking duplicates (filtered BAM)</h2>',
303307
html_foot="""
304308
<div id='help-filter'>
305-
Filtered out (samtools view -F 1804):
309+
Filtered with samtools flag 1804 (samtools view -F 1804):
306310
<ul>
307311
<li>read unmapped (0x4)</li>
308312
<li>mate unmapped (0x8, for paired-end)</li>
@@ -360,8 +364,16 @@ def make_cat_align(args, cat_root):
360364
'nodup_samstat',
361365
html_head='<h2>SAMstat (filtered/deduped BAM)</h2>',
362366
html_foot="""
363-
<p>Filtered and duplicates removed</p><br>
364-
""",
367+
<p>Filtered {dup_removal_detail}.
368+
Subsampling with {pipeline_prefix}.{subsample_param_name} is not done in alignment steps.
369+
Nodup BAM is converted into a BED type (TAGALIGN) later and then TAGALIGN is subsampled
370+
with such parameter in the peak-calling step.<br>
371+
</p>
372+
""".format(
373+
dup_removal_detail='but duplicates are kept' if args.no_dup_removal else 'and duplicates are removed',
374+
pipeline_prefix=args.pipeline_prefix,
375+
subsample_param_name='subsample_reads',
376+
),
365377
parser=parse_flagstat_qc,
366378
map_key_desc=MAP_KEY_DESC_FLAGSTAT_QC,
367379
parent=cat_align
@@ -465,9 +477,10 @@ def make_cat_lib_complexity(args, cat_root):
465477
locations with EXACTLY two read pairs. The PBC2 should be significantly
466478
greater than 1. {pipeline_specific_info}
467479
</p><br>
468-
<p>NRF (non redundant fraction) <br>
469-
PBC1 (PCR Bottleneck coefficient 1) <br>
470-
PBC2 (PCR Bottleneck coefficient 2) <br>
480+
<p>Fragment: read for a single-ended dataset, pair of reads for a paired-ended dataset <br>
481+
NRF: non redundant fraction <br>
482+
PBC1: PCR Bottleneck coefficient 1 <br>
483+
PBC2: PCR Bottleneck coefficient 2 <br>
471484
PBC1 is the primary measure. Provisionally <br>
472485
<ul>
473486
<li>0-0.5 is severe bottlenecking</li>
@@ -580,7 +593,7 @@ def make_cat_replication(args, cat_root):
580593
'num_peaks',
581594
html_head='<h2>Number of raw peaks</h2>',
582595
html_foot="""
583-
Top {num_peak} raw peaks from {peak_caller} {extra_info}
596+
The number of peaks is capped at {num_peak}<br>Peaks are called from {peak_caller} {extra_info}
584597
""".format(
585598
num_peak=args.cap_num_peak,
586599
peak_caller=args.peak_caller,
@@ -651,7 +664,7 @@ def make_cat_align_enrich(args, cat_root):
651664
html_head_xcor = '<h2>Strand cross-correlation measures (trimmed/filtered SE BAM)</h2>'
652665
html_foot_xcor = """
653666
<br><p>Performed on subsampled ({xcor_subsample_reads}) reads mapped from FASTQs that are trimmed to {xcor_trim_bp}.
654-
Such FASTQ trimming and subsampling reads are for cross-corrleation analysis only.
667+
Such FASTQ trimming and subsampling are for the cross-corrleation analysis only and only R1 reads are taken.
655668
Untrimmed FASTQs are used for all the other analyses.</p>
656669
<div id='help-xcor'><p>
657670
NOTE1: For SE datasets, reads from replicates are randomly subsampled to {xcor_subsample_reads}.<br>
@@ -670,6 +683,7 @@ def make_cat_align_enrich(args, cat_root):
670683
xcor_subsample_reads=args.xcor_subsample_reads
671684
)
672685
html_foot_xcor += """<ul>
686+
<li>Fragment = read (for single-ended dataset) or pair of reads (for paired-ended dataset) </li>
673687
<li>Normalized strand cross-correlation coefficient (NSC) = col9 in outFile </li>
674688
<li>Relative strand cross-correlation coefficient (RSC) = col10 in outFile </li>
675689
<li>Estimated fragment length = col3 in outFile, take the top value </li>

0 commit comments

Comments
 (0)