Skip to content

Commit 2155f85

Browse files
committed
code refactoring, bug fix for unary operator error
1 parent 61963e2 commit 2155f85

26 files changed

+1147
-1587
lines changed

chipseq.bds

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ use_macs2_gapped_peak := false help Use gapped peak instead of narrow peak (for
2424

2525
help() // print help and exit if no parameters are given
2626

27+
include "modules/pipeline_template.bds"
28+
2729
include "modules/input.bds"
2830

2931
include "modules/align_bwa.bds"
@@ -37,8 +39,6 @@ include "modules/callpeak_idr.bds"
3739

3840
include "modules/signal.bds"
3941

40-
include "modules/report.bds"
41-
4242

4343

4444
// Important file names are stored in global variables (usually a string map string{} with a key with replicate id and peakcaller name)
@@ -96,8 +96,6 @@ void init_chipseq() {
9696

9797
read_conf_chipseq()
9898

99-
print_chipseq()
100-
10199
init_filetable()
102100
}
103101

@@ -121,9 +119,7 @@ void read_conf_chipseq() {
121119
use_macs2_gapped_peak = get_conf_val_bool( use_macs2_gapped_peak, ["use_macs2_gapped_peak"] )
122120

123121
if ( histone ) type = "histone"
124-
}
125-
126-
void print_chipseq() {
122+
if ( idr_rank == "" ) idr_rank = is_TF_chipseq() ? "signal.value" : "p.value"
127123

128124
print( "\n\n== chipseq pipeline settings\n")
129125
print( "Type of ChIP-Seq pipeline\t\t: $type\n")
@@ -282,7 +278,7 @@ void align() {
282278
for ( int rep=1; rep <= get_num_rep( ctl ); rep++) {
283279
if ( !input_file_exists( ctl, rep ) ) continue
284280

285-
// check file size to distribute_nth nth to each nth_app
281+
// check file size to distribute_nonzero nth to each nth_app
286282
// determine # threads for each app related to alignment
287283

288284
group := get_group_name( ctl, rep )
@@ -300,7 +296,7 @@ void align() {
300296
}
301297
}
302298

303-
nth_rep := distribute_nth( nth, filesize )
299+
nth_rep := distribute_nonzero( nth, filesize )
304300

305301
// align
306302

@@ -717,13 +713,13 @@ void call_peaks() {
717713

718714
for ( int rep=1; rep<=get_num_rep(); rep++ ) {
719715

720-
nlines{"rep$rep"} = get_no_lines( tag{"rep$rep"} )
716+
nlines{"rep$rep"} = get_num_lines( tag{"rep$rep"} )
721717

722718
//print("DEBUG: # lines rep $rep tag.: " + nlines{"rep$rep"} + ", " + tag{"rep$rep"} + "\n")
723719

724720
if ( input_file_exists( 1, rep ) ) { // if control exists
725721

726-
nlines{"ctl$rep"} = get_no_lines( tag{"ctl$rep"} )
722+
nlines{"ctl$rep"} = get_num_lines( tag{"ctl$rep"} )
727723
nlines_ctl.add( nlines{"ctl$rep"} )
728724

729725
//print("DEBUG: # lines ctl $rep tag.: " + nlines{"ctl$rep"} + ", " + tag{"ctl$rep"} + "\n")
@@ -799,7 +795,7 @@ void call_peaks() {
799795

800796
// distribute # of threads for each peak calling
801797
int nth_pooled, nth_other
802-
( nth_pooled, nth_other ) = distribute_nth( nth, [ get_num_rep(), 1, 1, 1 ] )
798+
( nth_pooled, nth_other ) = distribute_nonzero( nth, [ get_num_rep(), 1, 1, 1 ] )
803799

804800
// create directories for peaks (spp, macs2), signal tracks (macs2)
805801
spp_o_dir := mkdir( "$out_dir/peak/spp")
@@ -1096,11 +1092,8 @@ void do_idr() {
10961092

10971093
if ( !is_final_stage_idr() ) return
10981094
if ( is_histone_chipseq() ) return
1099-
11001095
if ( !peak_exists() ) return // if no peaks from peak caller exit
11011096

1102-
if ( idr_rank == "" ) idr_rank = is_TF_chipseq() ? "signal.value" : "p.value"
1103-
11041097
// peak caller
11051098
pc := get_peak_caller()
11061099

@@ -1308,7 +1301,7 @@ string html_chipseq_tracks() {
13081301
if ( idr_pr.hasKey(rep) ) {trk_types += "hammock"; trk_names += "$title peak idr (rep$rep-pr)"; trk_files += peak_to_hammock( _get_idr_peak_trk( idr_pr{rep} ) ) }
13091302
}
13101303

1311-
html := html_epg_browser_viz( trk_files, trk_types, trk_names )
1304+
html := html_epg_browser_viz( trk_files, trk_types, trk_names, species )
13121305

13131306
return html
13141307
}

modules/align_bwa.bds

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#vim: syntax=java
33

44
include "species.bds"
5-
include "report.bds"
5+
include "module_template.bds"
66

77

88
help == align bwa settings (requirements: -bwa_idx)
@@ -72,7 +72,7 @@ string[] bwa_PE( string fastq1, string fastq2, string o_dir, string log_o_dir, s
7272

7373
if ( out <- in ) { // compare file timestamps of in and out (to check if job is already done or not)
7474

75-
nth_bwa_aln := distribute_nth( nth_bwa, [1,1] )
75+
nth_bwa_aln := distribute_nonzero( nth_bwa, [1,1] )
7676

7777
// parallel jobs
7878
sai1 := bwa_aln( fastq1, o_dir, group+"_1", nth_bwa_aln[0] )

modules/align_multimapping.bds

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@ help == align multimapping settings
88
multimapping := 0 help # alignments reported for multimapping (default: 0).
99

1010

11-
1211
init_align_multimapping()
1312

1413

1514
void init_align_multimapping() {
16-
1715
multimapping = get_conf_val_int( multimapping, ["multimapping"] )
1816

1917
print("\n\n== align multimapping settings\n")

modules/callpeak_idr.bds

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#vim: syntax=java
33

44
include "species.bds"
5-
include "report.bds"
5+
include "module_template.bds"
66

77

88
help == IDR settings
@@ -19,26 +19,20 @@ void init_idr() {
1919

2020
idr_suffix = get_conf_val_bool( idr_suffix, ["idr_suffix"] )
2121

22-
print_idr()
23-
}
24-
25-
void print_idr() {
26-
2722
print( "\n\n== IDR settings\n")
2823
print( "Append IDR threshold to IDR out_dir\t: $idr_suffix\n" )
2924
}
3025

3126
void chk_idr() {
32-
if ( !path_exists( blacklist ) ) print("\nWarning: Blacklist bed (-blacklist) for final IDR QC is missing! (file: $blacklist)\n\n")
27+
if ( !path_exists( blacklist ) ) print("\nWarning: Blacklist bed (-blacklist) for final IDR QC"+\
28+
" is missing! (file: $blacklist)\n\n")
3329
}
3430

35-
3631
// 12-col.bed.gz (10 col for narrowPeak + local IDR, global IDR) will be generated for browser tracks
3732
// A temporary 13th column bed should be generated due to bedtools intersect compatibility issue
3833
// use function _get_idr_peak_trk( idr_peak ) to find 12-col.bed.gz for browser tracks
3934
// rank: 'signal.value' for SPP peaks, 'p.value' for MACS2 peaks
4035
string[] idr2( string peak1, string peak2, string pooled, string idr_thresh, string rank, string o_dir, string group ) {
41-
4236
if ( idr_suffix ) o_dir = mkdir( o_dir + "_$idr_thresh" )
4337

4438
prefix := "$o_dir/" + (title=="" ? "" : (title+"_") ) + group
@@ -111,12 +105,11 @@ string[] idr2( string peak1, string peak2, string pooled, string idr_thresh, str
111105
// idr_pr: IDR peaks for pseudo replicates (key:"rep") e.g. "1" for pseudo replicates of replicate 1
112106
string[] idr_final_qc( string{} idr_tr, string{} idr_pr, string idr_ppr, \
113107
string idr_o_dir, string qc_o_dir, string group ) {
114-
115108
//// get # of lines in each IDR peak files
116109

117110
// pseudo replicates
118111
int[] N
119-
for ( string peak : idr_pr ) N.add( get_no_lines( peak ) )
112+
for ( string peak : idr_pr ) N.add( get_num_lines( peak ) )
120113

121114
// true replicates
122115
Nt := 0
@@ -125,7 +118,7 @@ string[] idr_final_qc( string{} idr_tr, string{} idr_pr, string idr_ppr, \
125118
for ( string key : idr_tr.keys() ) { // key = rep id 1, rep id 2
126119

127120
peak := idr_tr{ key }
128-
nlines := ( peak != "" ) ? get_no_lines( peak ) : 0
121+
nlines := ( peak != "" ) ? get_num_lines( peak ) : 0
129122

130123
if ( nlines >= Nt ) {
131124
Nt = nlines
@@ -135,7 +128,7 @@ string[] idr_final_qc( string{} idr_tr, string{} idr_pr, string idr_ppr, \
135128
}
136129

137130
// pooled pseudo replicates
138-
Np := ( idr_ppr != "" ) ? get_no_lines( idr_ppr ) : 0
131+
Np := ( idr_ppr != "" ) ? get_num_lines( idr_ppr ) : 0
139132

140133
//// find optimal set and conservative set
141134
string optimal_set, conservative_set
@@ -180,11 +173,11 @@ string[] idr_final_qc( string{} idr_tr, string{} idr_pr, string idr_ppr, \
180173

181174
//// compute IDR scores
182175

183-
real max_Np_Nt = max( Np, Nt )
184-
real min_Np_Nt = min( Np, Nt )
176+
real max_Np_Nt = _max( Np, Nt )
177+
real min_Np_Nt = _min( Np, Nt )
185178

186-
real max_N = N.size() > 0 ? max( N ) : 0
187-
real min_N = N.size() > 0 ? min( N ) : 0
179+
real max_N = N.size() > 0 ? _max( N ) : 0
180+
real min_N = N.size() > 0 ? _min( N ) : 0
188181

189182
rescue_ratio := max_Np_Nt / min_Np_Nt
190183
self_consistency_ratio := max_N / min_N
@@ -227,14 +220,32 @@ string[] idr_final_qc( string{} idr_tr, string{} idr_pr, string idr_ppr, \
227220
}
228221

229222
string _get_group_from_key( string key ) { //parse "i,j" to "repi-repj"
230-
231223
tmp := key.split(",")
232224
i := tmp[0]
233225
j := tmp[1]
234226
return "rep$i-rep$j"
235227
}
236228

237229
string _get_idr_peak_trk( string idr_peak ) {
238-
239230
return rm_ext( idr_peak, ["narrowPeak","regionPeak"] ) + ".12-col.bed.gz"
240231
}
232+
233+
int _max( int a, int b ) {
234+
return max( a, b )
235+
}
236+
237+
int _min( int a, int b ) {
238+
return min( a, b )
239+
}
240+
241+
int _max( int[] arr ) {
242+
tmp := arr[0]
243+
for( int i=1; i<arr.size();i++ ) tmp = _max( tmp, arr[i] )
244+
return tmp
245+
}
246+
247+
int _min( int[] arr ) {
248+
tmp := arr[0]
249+
for( int i=1; i<arr.size();i++ ) tmp = _min( tmp, arr[i] )
250+
return tmp
251+
}

modules/callpeak_macs2.bds

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#vim: syntax=java
33

44
include "species.bds"
5-
include "report.bds"
5+
include "module_template.bds"
66

77

88
help == callpeak macs2 settings (requirements: -chrsz -gensz)

modules/callpeak_naive_overlap.bds

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#vim: syntax=java
33

44
include "species.bds"
5-
include "report.bds"
5+
include "module_template.bds"
66

77

88
grp_color_naive_overlap := "skyblue"

modules/callpeak_spp.bds

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#vim: syntax=java
33

44
include "species.bds"
5-
include "report.bds"
5+
include "module_template.bds"
66

77

88
help == callpeak spp settings

modules/cluster.bds

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env bds
2+
#vim: syntax=java
3+
4+
include "conf.bds"
5+
6+
7+
help == cluster/system/resource settings
8+
wt := "5h50m" help Walltime for all single-threaded tasks (example: 8:10:00, 3h, 3600, default: 5h50m, 5:50:00).
9+
memory := "7G" help Maximum memory for all single-threaded tasks (equivalent to '-mem', example: 4.5G, 1024M, default: 7G).
10+
use_system := "" help Force to use a system (equivalent to 'bds -s [SYSTEM_NAME] ...', any system defined in bds.config can be used).
11+
nice := 0 help Set process priority for all tasks (default: 0; -20 (highest) ~ 19 (lowest) ).
12+
retrial := 0 help # of Retrial for failed tasks (default: 0).
13+
q := "" help Submit tasks to a specified cluster queue.
14+
unlimited_mem_wt:= false help Use unlimited max. memory and walltime.
15+
16+
17+
init_cluster()
18+
19+
20+
void init_cluster() {
21+
wt = get_conf_val( wt, ["wt"] )
22+
memory = get_conf_val( memory, ["memory","mem"] )
23+
use_system = get_conf_val( use_system, ["use_system","system"] )
24+
nice = get_conf_val_int( nice, ["nice"] )
25+
retrial = get_conf_val_int( retrial, ["retrial","retry"] )
26+
q = get_conf_val( q, ["q"] )
27+
unlimited_mem_wt= get_conf_val_bool( unlimited_mem_wt, ["unlimited_mem_wt"] )
28+
29+
if ( cmd_line_arg_has_key("mem") ) memory = get_cmd_line_arg_val( "mem" )
30+
if ( cmd_line_arg_has_key("system") ) use_system = get_cmd_line_arg_val( "system" )
31+
if ( nice <= -20 ) nice = -20
32+
if ( nice > 19 ) nice = 19
33+
if ( use_system != "" ) system = use_system.toLower()
34+
if ( system == "slurm" ) system = "generic"
35+
if ( q != "" ) queue = q
36+
37+
// cpus, mem and timeout are pre-declared BDS variables for default resource settings
38+
mem = get_res_mem(memory,1)
39+
timeout = get_res_wt(wt)
40+
retry = retrial
41+
42+
// do not modify this (BDS timeout; how long BDS will wait for tasks to be queued on the cluster)
43+
walltimeout = 3600*24*100 // timeout var. in BigDataScript (100 days, jobs will never be stopped by BDS due to BDS timeout)
44+
45+
print("\n\n== cluster/system info\n")
46+
print( "Walltime (general)\t\t: $wt\n" )
47+
print( "Max. memory (general)\t\t: $memory\n" )
48+
print( "Force to use a system\t\t: $use_system\n" )
49+
print( "Process priority (niceness)\t: $nice\n" )
50+
print( "Retiral for failed tasks\t: $retrial\n" )
51+
print( "Submit tasks to a cluster queue\t: $q\n" )
52+
print( "Unlimited cluster mem./walltime\t: $unlimited_mem_wt\n")
53+
}
54+
55+
int get_res_wt( string str ) {
56+
return (unlimited_mem_wt || is_system_local() ) ? -1 : parse_time( str )
57+
}
58+
59+
int get_res_mem( string str, int n ) {
60+
if ( n < 1 ) n = 1
61+
return (unlimited_mem_wt || is_system_local() ) ? -1 : parse_mem( str )/n
62+
}
63+
64+
int get_res_mem( string str ) {
65+
return get_res_mem( str , 1 )
66+
}
67+
68+
bool is_system_sge() {
69+
return system == "sge"
70+
}
71+
72+
bool is_system_local() {
73+
return system == "local"
74+
}
75+
76+
bool is_system_generic() {
77+
return system == "generic"
78+
}
79+
80+
bool is_system_slurm() {
81+
// slurm uses generic cluster, it's configured in bds.config and ./utils/clusterGeneral
82+
return system == "generic"
83+
}

0 commit comments

Comments
 (0)