Skip to content

Commit bb54895

Browse files
authored
Merge pull request #211 from nf-core/136-implement-psirc-for-isoform-detection
Implement isoform detection methods
2 parents e8bfa9c + b559371 commit bb54895

File tree

113 files changed

+6537
-384
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+6537
-384
lines changed

conf/full.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@
1212
*/
1313

1414
params {
15-
tools = 'circexplorer2,ciriquant,find_circ,circrna_finder,mapsplice,dcc,segemehl'
16-
min_tools = 2
15+
tools = 'psirc,circexplorer2,ciri,find_circ,circrna_finder,mapsplice,circtools,segemehl'
16+
min_tools = 2
1717
}

conf/modules.config

Lines changed: 294 additions & 116 deletions
Large diffs are not rendered by default.

modules.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@
6666
"git_sha": "15a93581f7f81d05c04b828954004a089360ea9b",
6767
"installed_by": ["modules"]
6868
},
69+
"bwa/mem": {
70+
"branch": "master",
71+
"git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d",
72+
"installed_by": ["modules"],
73+
"patch": "modules/nf-core/bwa/mem/bwa-mem.diff"
74+
},
6975
"cat/cat": {
7076
"branch": "master",
7177
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
@@ -111,11 +117,21 @@
111117
"git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358",
112118
"installed_by": ["modules"]
113119
},
120+
"fastp": {
121+
"branch": "master",
122+
"git_sha": "d082103d7976a2804f21225446cc110cbd822f4c",
123+
"installed_by": ["modules"]
124+
},
114125
"fastqc": {
115126
"branch": "master",
116127
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
117128
"installed_by": ["modules"]
118129
},
130+
"filtlong": {
131+
"branch": "master",
132+
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
133+
"installed_by": ["modules"]
134+
},
119135
"gawk": {
120136
"branch": "master",
121137
"git_sha": "27be1be5a87724096b51e1fe579ecbd773f53f1b",
@@ -201,6 +217,16 @@
201217
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
202218
"installed_by": ["modules"]
203219
},
220+
"seqkit/fx2tab": {
221+
"branch": "master",
222+
"git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
223+
"installed_by": ["modules"]
224+
},
225+
"seqkit/stats": {
226+
"branch": "master",
227+
"git_sha": "9fe7867a4f012db581d7bd560228ded1074eb0e5",
228+
"installed_by": ["modules"]
229+
},
204230
"star/align": {
205231
"branch": "master",
206232
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
@@ -221,6 +247,11 @@
221247
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
222248
"installed_by": ["modules"]
223249
},
250+
"trinity": {
251+
"branch": "master",
252+
"git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
253+
"installed_by": ["modules"]
254+
},
224255
"tximeta/tximport": {
225256
"branch": "master",
226257
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",

modules/local/annotation/bed2gtf/templates/bed2gtf.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import yaml
55

66
import polars as pl
7+
import yaml
78

89
# Versions
910

@@ -17,8 +18,6 @@
1718
with open("versions.yml", "w") as f:
1819
f.write(yaml.dump(versions))
1920

20-
# Main
21-
2221
exons_only = bool("${exons_only}")
2322

2423
columns = ['chr', 'start', 'end', 'name', 'score', 'strand',
@@ -41,8 +40,8 @@
4140

4241
if exons_only:
4342
df_exons = df.with_columns(
44-
exonSizes = pl.col('exonSizes').str.split(','),
45-
exonStarts = pl.col('exonStarts').str.split(',')
43+
exonSizes = pl.col('exonSizes').cast(pl.Utf8).str.split(','),
44+
exonStarts = pl.col('exonStarts').cast(pl.Utf8).str.split(',')
4645
).explode('exonSizes', 'exonStarts')
4746
df_exons = df_exons.with_columns(
4847
type = pl.lit('exon'),
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- bioconda::gtfparse=2.5.0
6+
- conda-forge::pyyaml=6.0.2
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
process CIRCTOOLS_ANNOTATION {
2+
tag "${meta.id}"
3+
label 'process_single'
4+
5+
conda "environment.yml"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'oras://community.wave.seqera.io/library/gtfparse_pyyaml:e3b66ef529a096e7'
8+
: 'community.wave.seqera.io/library/gtfparse_pyyaml:508c7607beb17f69'}"
9+
10+
input:
11+
tuple val(meta), path(gtf)
12+
13+
output:
14+
tuple val(meta), path("${prefix}.bed"), emit: bed
15+
path "versions.yml" , emit: versions
16+
17+
script:
18+
prefix = task.ext.prefix ?: "${meta.id}"
19+
template 'annotation.py'
20+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#! /usr/bin/env python3
2+
3+
import platform
4+
5+
import gtfparse
6+
import polars as pl
7+
import yaml
8+
9+
# Read GTF file
10+
df_gtf = gtfparse.read_gtf("${gtf}")
11+
12+
# Filter for exon features only
13+
exons = df_gtf.filter(pl.col("feature") == "exon")
14+
15+
# Convert to polars DataFrame for better performance
16+
exons_pl = pl.DataFrame(exons)
17+
18+
# Add 0-based start for BED format
19+
exons_pl = exons_pl.with_columns(
20+
start_0based=pl.col("start") - 1
21+
)
22+
23+
# Number exons within each gene
24+
exons_pl = (
25+
exons_pl
26+
.sort(["gene_id", "start"])
27+
.with_columns([
28+
pl.col("gene_id").cumcount().over("gene_id").alias("exon_number")
29+
])
30+
)
31+
32+
# Create name column in the required format
33+
exons_pl = exons_pl.with_columns([
34+
(
35+
pl.col("gene_id") + "_exon_" +
36+
pl.col("exon_number").cast(pl.Utf8) + "_0_chr" +
37+
pl.col("seqname") + "_" +
38+
(pl.col("start")).cast(pl.Utf8) + "_" +
39+
pl.when(pl.col("strand") == "+").then(pl.lit("f")).otherwise(pl.lit("r"))
40+
).alias("name")
41+
])
42+
43+
# Select and rename columns for BED format
44+
bed_df = exons_pl.select([
45+
pl.col("seqname").alias("chrom"),
46+
pl.col("start_0based").alias("start"),
47+
pl.col("end"),
48+
pl.col("name"),
49+
pl.lit(0).alias("score"),
50+
pl.col("strand")
51+
])
52+
53+
# Write to BED file
54+
bed_df.write_csv("${prefix}.bed", separator="\\t", include_header=False)
55+
56+
# Log versions
57+
versions = {
58+
"${task.process}": {
59+
"python": platform.python_version(),
60+
"polars": pl.__version__
61+
}
62+
}
63+
64+
with open("versions.yml", "w") as f:
65+
yaml.dump(versions, f)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- bioconda::circtools=2.0
Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
process DCC {
1+
process CIRCTOOLS_DETECT {
22
tag "${meta.id}"
33
label 'process_high'
44

5-
conda "bioconda::circtools=2.0"
5+
conda "environment.yml"
66
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
77
? 'https://depot.galaxyproject.org/singularity/circtools:2.0--pyhdfd78af_0'
88
: 'biocontainers/circtools:2.0--pyhdfd78af_0'}"
@@ -19,9 +19,6 @@ process DCC {
1919

2020
path "versions.yml", emit: versions
2121

22-
when:
23-
task.ext.when == null || task.ext.when
24-
2522
script:
2623
def args = task.ext.args ?: ''
2724
prefix = task.ext.prefix ?: "${meta.id}"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- bioconda::bedtools=2.31.1
6+
- bioconda::circtools=2.0.1
7+
- bioconda::nanofilt=2.8.0
8+
- bioconda::pblat=2.5.1
9+
- bioconda::samtools=1.22

0 commit comments

Comments
 (0)