File tree Expand file tree Collapse file tree 3 files changed +25
-11
lines changed
subworkflows/local/preprocess_bedfile_to_fasta Expand file tree Collapse file tree 3 files changed +25
-11
lines changed Original file line number Diff line number Diff line change 1- BEGIN {
2- # Read chromosome sizes into an array
3- while (( getline < chrom_size_file) > 0 ) {
4- chrom_sizes[$1 ] = $2 ;
5- }
6- }
1+ # First file (ex. chrom_sizes.txt): Load chromosome sizes into an array
2+ NR==FNR { chrom_sizes[$1 ] = $2 ; next; }
3+
4+ # Second file (ex. input.bed): Process BED data
75# Print header lines as they are
86/^# / { print; next; }
97{
@@ -13,18 +11,18 @@ BEGIN {
1311
1412 # the new start and end values
1513 start = mid - left;
16- end = mid + right;
14+ ends = mid + right;
1715
1816 # Ensure start is not negative
1917 if (start < 0) start = 0;
2018
2119 # Ensure end does not exceed chromosome size
22- if ($1 in chrom_sizes && end > chrom_sizes[$1 ]) {
23- end = chrom_sizes[$1 ];
20+ if ($1 in chrom_sizes && ends > chrom_sizes[$1 ]) {
21+ ends = chrom_sizes[$1 ];
2422 }
2523
2624 # Print updated start, end, and ALL remaining columns
27- printf "%s\t%d\t%d", $1 , start, end ;
25+ printf " %s\t%d\t%d" , $1 , start, ends ;
2826 for (i=4; i< =NF; i++) {
2927 printf " \t%s" , $i ;
3028 }
Original file line number Diff line number Diff line change @@ -36,8 +36,9 @@ process {
3636 publishDir = [
3737 enabled: false
3838 ]
39- ext.args = { "-v N=${meta.size} -v chrom_size_file=${meta.genome_file }" }
39+ ext.args = { "-v N=${meta.size}" }
4040 ext.prefix = { "${meta.id}_centered" }
41+ ext.suffix = { "bed" }
4142 }
4243
4344 withName: "SPLIT_DATA_CONFIG" {
Original file line number Diff line number Diff line change @@ -18,6 +18,8 @@ the dataset for stimulus with sequences as input and foreground/background
1818*/
1919include { EXTRACT_DATA_CONTENT_BY_COLUMN_VALUES as EXTRACT_FOREGROUND } from ' ../../../modules/local/extract_data_content_by_column_values'
2020include { EXTRACT_DATA_CONTENT_BY_COLUMN_VALUES as EXTRACT_BACKGROUND_ALIENS } from ' ../../../modules/local/extract_data_content_by_column_values'
21+ include { GAWK as CENTER_AROUND_PEAK } from ' ../../../modules/nf-core/gawk'
22+
2123
2224workflow PREPROCESS_BEDFILE_TO_FASTA {
2325 take :
@@ -26,6 +28,19 @@ workflow PREPROCESS_BEDFILE_TO_FASTA {
2628
2729 main :
2830
31+ // TODO the foolowing is just a proof of concept and how to example
32+ // on the usage of the GAWK nf-core module for modifying
33+ // bed start and end values based on distance from peak (centering).
34+ /*
35+ ch_genome_size = channel.fromPath("/users/cn/avignoli/test/human.hg38.genome") // abs path so you can go and check if needed on cluster.
36+ ch_input_bed = channel.fromPath("/users/cn/avignoli/test/input.bed")
37+ ch_center_input = ch_genome_size.combine(ch_input_bed).map{
38+ it -> [["id" : it[1].getBaseName(), "size" : 10], it]
39+ } // TODO replace size with the appropriate params/variable containing the size to be used for centering
40+ ch_awk_program = channel.fromPath('./bin/center_around_peak.sh')
41+ CENTER_AROUND_PEAK(ch_center_input, ch_awk_program)
42+ */
43+
2944 // extract foreground
3045
3146 ch_foreground_ids = ch_config
You can’t perform that action at this time.
0 commit comments