Skip to content

Commit 5c321e7

Browse files
made example on how to call center_around_peack module and tested it
1 parent f77dc12 commit 5c321e7

File tree

3 files changed

+25
-11
lines changed

3 files changed

+25
-11
lines changed

bin/center_around_peak.sh

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
BEGIN {
2-
# Read chromosome sizes into an array
3-
while ((getline < chrom_size_file) > 0) {
4-
chrom_sizes[$1] = $2;
5-
}
6-
}
1+
# First file (ex. chrom_sizes.txt): Load chromosome sizes into an array
2+
NR==FNR { chrom_sizes[$1] = $2; next; }
3+
4+
# Second file (ex. input.bed): Process BED data
75
# Print header lines as they are
86
/^#/ { print; next; }
97
{
@@ -13,18 +11,18 @@ BEGIN {
1311

1412
# the new start and end values
1513
start = mid - left;
16-
end = mid + right;
14+
ends = mid + right;
1715

1816
# Ensure start is not negative
1917
if (start < 0) start = 0;
2018

2119
# Ensure end does not exceed chromosome size
22-
if ($1 in chrom_sizes && end > chrom_sizes[$1]) {
23-
end = chrom_sizes[$1];
20+
if ($1 in chrom_sizes && ends > chrom_sizes[$1]) {
21+
ends = chrom_sizes[$1];
2422
}
2523

2624
# Print updated start, end, and ALL remaining columns
27-
printf "%s\t%d\t%d", $1, start, end;
25+
printf "%s\t%d\t%d", $1, start, ends;
2826
for (i=4; i<=NF; i++) {
2927
printf "\t%s", $i;
3028
}

conf/modules.config

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ process {
3636
publishDir = [
3737
enabled: false
3838
]
39-
ext.args = { "-v N=${meta.size} -v chrom_size_file=${meta.genome_file}" }
39+
ext.args = { "-v N=${meta.size}" }
4040
ext.prefix = { "${meta.id}_centered" }
41+
ext.suffix = { "bed" }
4142
}
4243

4344
withName: "SPLIT_DATA_CONFIG" {

subworkflows/local/preprocess_bedfile_to_fasta/main.nf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ the dataset for stimulus with sequences as input and foreground/background
1818
*/
1919
include { EXTRACT_DATA_CONTENT_BY_COLUMN_VALUES as EXTRACT_FOREGROUND } from '../../../modules/local/extract_data_content_by_column_values'
2020
include { EXTRACT_DATA_CONTENT_BY_COLUMN_VALUES as EXTRACT_BACKGROUND_ALIENS } from '../../../modules/local/extract_data_content_by_column_values'
21+
include { GAWK as CENTER_AROUND_PEAK } from '../../../modules/nf-core/gawk'
22+
2123

2224
workflow PREPROCESS_BEDFILE_TO_FASTA {
2325
take:
@@ -26,6 +28,19 @@ workflow PREPROCESS_BEDFILE_TO_FASTA {
2628

2729
main:
2830

31+
// TODO the foolowing is just a proof of concept and how to example
32+
// on the usage of the GAWK nf-core module for modifying
33+
// bed start and end values based on distance from peak (centering).
34+
/*
35+
ch_genome_size = channel.fromPath("/users/cn/avignoli/test/human.hg38.genome") // abs path so you can go and check if needed on cluster.
36+
ch_input_bed = channel.fromPath("/users/cn/avignoli/test/input.bed")
37+
ch_center_input = ch_genome_size.combine(ch_input_bed).map{
38+
it -> [["id" : it[1].getBaseName(), "size" : 10], it]
39+
} // TODO replace size with the appropriate params/variable containing the size to be used for centering
40+
ch_awk_program = channel.fromPath('./bin/center_around_peak.sh')
41+
CENTER_AROUND_PEAK(ch_center_input, ch_awk_program)
42+
*/
43+
2944
// extract foreground
3045

3146
ch_foreground_ids = ch_config

0 commit comments

Comments
 (0)