@@ -34,6 +34,8 @@ def parse_arguments():
3434 help = 'Description for sample.' )
3535 parser .add_argument ('--genome' , type = str ,
3636 help = 'Reference genome.' )
37+ parser .add_argument ('--pipeline-prefix' , type = str , required = True ,
38+ help = 'Pipeline. e.g. atac, chip.' )
3739 parser .add_argument ('--pipeline-ver' , type = str ,
3840 help = 'Pipeline version.' )
3941 parser .add_argument ('--multimapping' , default = 0 , type = int ,
@@ -50,6 +52,8 @@ def parse_arguments():
5052 help = 'Pipeline type.' )
5153 parser .add_argument ('--aligner' , type = str , required = True ,
5254 help = 'Aligner.' )
55+ parser .add_argument ('--no-dup-removal' , action = 'store_true' ,
56+ help = 'No duplicate removal.' )
5357 parser .add_argument ('--peak-caller' , type = str , required = True ,
5458 help = 'Peak caller.' )
5559 parser .add_argument ('--cap-num-peak' , default = 0 , type = int ,
@@ -302,7 +306,7 @@ def make_cat_align(args, cat_root):
302306 html_head = '<h2>Marking duplicates (filtered BAM)</h2>' ,
303307 html_foot = """
304308 <div id='help-filter'>
305- Filtered out (samtools view -F 1804):
309+ Filtered with samtools flag 1804 (samtools view -F 1804):
306310 <ul>
307311 <li>read unmapped (0x4)</li>
308312 <li>mate unmapped (0x8, for paired-end)</li>
@@ -360,8 +364,16 @@ def make_cat_align(args, cat_root):
360364 'nodup_samstat' ,
361365 html_head = '<h2>SAMstat (filtered/deduped BAM)</h2>' ,
362366 html_foot = """
363- <p>Filtered and duplicates removed</p><br>
364- """ ,
367+ <p>Filtered {dup_removal_detail}.
368+ Subsampling with {pipeline_prefix}.{subsample_param_name} is not done in alignment steps.
369+ Nodup BAM is converted into a BED type (TAGALIGN) later and then TAGALIGN is subsampled
370+ with such parameter in the peak-calling step.<br>
371+ </p>
372+ """ .format (
373+ dup_removal_detail = 'but duplicates are kept' if args .no_dup_removal else 'and duplicates are removed' ,
374+ pipeline_prefix = args .pipeline_prefix ,
375+ subsample_param_name = 'subsample_reads' ,
376+ ),
365377 parser = parse_flagstat_qc ,
366378 map_key_desc = MAP_KEY_DESC_FLAGSTAT_QC ,
367379 parent = cat_align
@@ -465,9 +477,10 @@ def make_cat_lib_complexity(args, cat_root):
465477 locations with EXACTLY two read pairs. The PBC2 should be significantly
466478 greater than 1. {pipeline_specific_info}
467479 </p><br>
468- <p>NRF (non redundant fraction) <br>
469- PBC1 (PCR Bottleneck coefficient 1) <br>
470- PBC2 (PCR Bottleneck coefficient 2) <br>
480+ <p>Fragment: read for a single-ended dataset, pair of reads for a paired-ended dataset <br>
481+ NRF: non redundant fraction <br>
482+ PBC1: PCR Bottleneck coefficient 1 <br>
483+ PBC2: PCR Bottleneck coefficient 2 <br>
471484 PBC1 is the primary measure. Provisionally <br>
472485 <ul>
473486 <li>0-0.5 is severe bottlenecking</li>
@@ -580,7 +593,7 @@ def make_cat_replication(args, cat_root):
580593 'num_peaks' ,
581594 html_head = '<h2>Number of raw peaks</h2>' ,
582595 html_foot = """
583- Top {num_peak} raw peaks from {peak_caller} {extra_info}
596+ The number of peaks is capped at {num_peak}<br>Peaks are called from {peak_caller} {extra_info}
584597 """ .format (
585598 num_peak = args .cap_num_peak ,
586599 peak_caller = args .peak_caller ,
@@ -651,7 +664,7 @@ def make_cat_align_enrich(args, cat_root):
651664 html_head_xcor = '<h2>Strand cross-correlation measures (trimmed/filtered SE BAM)</h2>'
652665 html_foot_xcor = """
653666 <br><p>Performed on subsampled ({xcor_subsample_reads}) reads mapped from FASTQs that are trimmed to {xcor_trim_bp}.
654- Such FASTQ trimming and subsampling reads are for cross-corrleation analysis only.
667+ Such FASTQ trimming and subsampling are for the cross-corrleation analysis only and only R1 reads are taken.
655668 Untrimmed FASTQs are used for all the other analyses.</p>
656669 <div id='help-xcor'><p>
657670 NOTE1: For SE datasets, reads from replicates are randomly subsampled to {xcor_subsample_reads}.<br>
@@ -670,6 +683,7 @@ def make_cat_align_enrich(args, cat_root):
670683 xcor_subsample_reads = args .xcor_subsample_reads
671684 )
672685 html_foot_xcor += """<ul>
686+ <li>Fragment = read (for single-ended dataset) or pair of reads (for paired-ended dataset) </li>
673687 <li>Normalized strand cross-correlation coefficient (NSC) = col9 in outFile </li>
674688 <li>Relative strand cross-correlation coefficient (RSC) = col10 in outFile </li>
675689 <li>Estimated fragment length = col3 in outFile, take the top value </li>
0 commit comments