Skip to content

Commit 98d1d0a

Browse files
committed
Merge branch 'dev'
2 parents 14c8a2e + 2013e2d commit 98d1d0a

23 files changed

+2672
-2184
lines changed

.envrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
use flake . --quiet

.gitignore

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,16 @@
55
!README.md
66
!justfile
77
!Dockerfile
8+
!Containerfile
89
!pyproject.toml
10+
!flake.nix
911
!pixi.lock
1012
!uv.lock
1113
!_quarto.yml
1214
!.pre-commit-config.yaml
1315
!refman.toml
1416
!data_manifest.yml
17+
!.envrc
1518
!.gitignore
1619
!.gitattributes
1720
!main.nf
@@ -35,8 +38,14 @@
3538
!/conf/*.yml
3639

3740
# bin of executable scripts
38-
!/bin
41+
!/bin/
3942
!/bin/*.py
43+
!/bin/*.R
44+
!/bin/*.r
45+
!/bin/*.pl
46+
!/bin/*.lua
47+
!/bin/*.sh
48+
!/bin/*.awk
4049

4150
# groovy libraries
4251
!/lib
File renamed without changes.

bin/make_primer_patterns.lua

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env lua
2+
3+
-- Usage:
4+
-- lua make_primer_patterns.lua -i input.fasta [-o output_prefix] [-f forward_pattern] [-r reverse_pattern]
5+
6+
local function parse_args()
7+
local opts = {
8+
input_fasta = nil,
9+
output_prefix = "primer_patterns",
10+
forward_pattern = "^(.*?)",
11+
reverse_pattern = "^(.*?)",
12+
}
13+
14+
local i = 1
15+
while i <= #arg do
16+
local a = arg[i]
17+
if a == "-i" or a == "--input_fasta" then
18+
i = i + 1
19+
opts.input_fasta = arg[i]
20+
elseif a == "-o" or a == "--output_prefix" then
21+
i = i + 1
22+
opts.output_prefix = arg[i]
23+
elseif a == "-f" or a == "--forward_pattern" then
24+
i = i + 1
25+
opts.forward_pattern = arg[i]
26+
elseif a == "-r" or a == "--reverse_pattern" then
27+
i = i + 1
28+
opts.reverse_pattern = arg[i]
29+
else
30+
error("Unknown argument: " .. a)
31+
end
32+
i = i + 1
33+
end
34+
35+
assert(opts.input_fasta, "You must provide --input_fasta (-i)")
36+
return opts
37+
end
38+
39+
local function read_fasta_lines(path)
40+
local file, err = io.open(path, "r")
41+
assert(file, "Could not open FASTA file: " .. (err or "unknown error"))
42+
local lines = {}
43+
for line in file:lines() do
44+
lines[#lines + 1] = line:gsub("%s+", "") -- strip whitespace
45+
end
46+
file:close()
47+
return lines
48+
end
49+
50+
local function generate_patterns(fasta_path, label, fwd_prefix, rev_suffix)
51+
local lines = read_fasta_lines(fasta_path)
52+
53+
-- Extract sequences and headers
54+
local seqs, headers, entries = {}, {}, {}
55+
local accumulator = nil
56+
for _, line in ipairs(lines) do
57+
if line:sub(1, 1) == ">" then
58+
if accumulator then
59+
entries[#entries + 1] = accumulator
60+
end
61+
accumulator = { header = line, seq = "" }
62+
else
63+
assert(accumulator)
64+
accumulator.seq = accumulator.seq .. line
65+
end
66+
end
67+
if accumulator then
68+
entries[#entries + 1] = accumulator
69+
end
70+
71+
-- Crash if the number of parsed sequences isn't exactly 2
72+
assert(#seqs == 2)
73+
74+
-- Parse start coordinates from header lines
75+
local starts = {}
76+
for _, header in ipairs(headers) do
77+
local start = header:match(":(%d+)%-%d+")
78+
assert(start, "Invalid header format (expected bedtools-style): " .. header)
79+
starts[#starts + 1] = tonumber(start)
80+
end
81+
82+
-- Heuristic check for orientation assumption
83+
if starts[1] > starts[2] then
84+
io.stderr:write(
85+
"⚠️ Warning: Please double check that the provided FASTA is formatted like an output from `bedtools getfasta`, e.g.\n\n'>PP599462.1:0-16'"
86+
)
87+
end
88+
89+
-- Build patterns
90+
local fwd_pattern = fwd_prefix .. seqs[1]
91+
local rev_pattern = seqs[2] .. rev_suffix
92+
93+
-- Write output
94+
local out, err = io.open(label .. ".txt", "w")
95+
assert(out, "Failed to write output: " .. (err or "unknown error"))
96+
out:write(fwd_pattern .. "\n")
97+
out:write(rev_pattern .. "\n")
98+
end
99+
100+
local function main()
101+
local opts = parse_args()
102+
103+
-- make sure the provided file exists
104+
local file = io.open(opts.input_fasta, "r")
105+
assert(file, "Input FASTA file does not exist: " .. opts.input_fasta)
106+
file:close()
107+
108+
-- generate the patterns and write them to a text file
109+
generate_patterns(opts.input_fasta, opts.output_prefix, opts.forward_pattern, opts.reverse_pattern)
110+
end
111+
112+
-- Run main
113+
if debug.getinfo(1, "S").short_src == arg[0] then
114+
main()
115+
end

bin/split_primer_combos.lua

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env lua
2+
3+
-- Usage: lua split_primers.lua input.bed _LEFT _RIGHT
4+
-- Default suffixes if not provided
5+
local input_bed = arg[1] -- remember lua is 1-based!
6+
local forward_suffix = arg[2] or "_LEFT"
7+
local reverse_suffix = arg[3] or "_RIGHT"
8+
9+
-- Make sure the input bed file is provided
10+
assert(input_bed, "Usage: lua split_primers.lua <input.bed> [_LEFT] [_RIGHT]")
11+
12+
-- Initialize a table to store each primer
13+
local primers = {}
14+
15+
-- Read BED line by line
16+
local file = io.open(input_bed, "r")
17+
assert(file, "Failed to open input BED file.")
18+
19+
for line in file:lines() do
20+
local ref, start_pos, stop_pos, name, index, sense =
21+
line:match("([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)")
22+
assert(name, "Malformed BED line: " .. line)
23+
24+
local base_name = name
25+
base_name = base_name:gsub(forward_suffix, "")
26+
base_name = base_name:gsub(reverse_suffix, "")
27+
28+
primers[base_name] = primers[base_name] or {}
29+
table.insert(primers[base_name], line)
30+
end
31+
32+
file:close()
33+
34+
-- Write each group to its own BED file
35+
for base_name, records in pairs(primers) do
36+
assert(#records == 2, "Expected 2 records for " .. base_name .. ", found " .. #records)
37+
local out = assert(io.open(base_name .. ".bed", "w"))
38+
for _, rec in ipairs(records) do
39+
out:write(rec .. "\n")
40+
end
41+
out:close()
42+
end

flake.nix

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
{
2+
description = "Reproducible dev shell for the `oneroof` bioinformatic processing pipeline";
3+
4+
inputs = {
5+
nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
6+
flake-utils.url = "github:numtide/flake-utils";
7+
};
8+
9+
outputs =
10+
{
11+
self,
12+
nixpkgs,
13+
flake-utils,
14+
...
15+
}:
16+
flake-utils.lib.eachDefaultSystem (
17+
system:
18+
let
19+
pkgs = import nixpkgs {
20+
inherit system;
21+
};
22+
23+
dorado =
24+
if system == "x86_64-linux" then
25+
pkgs.stdenv.mkDerivation {
26+
name = "dorado";
27+
src = pkgs.fetchurl {
28+
url = "https://cdn.oxfordnanoportal.com/software/analysis/dorado-0.7.1-linux-x64.tar.gz";
29+
sha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
30+
};
31+
32+
unpackPhase = "tar -xvf $src";
33+
installPhase = ''
34+
set -euo pipefail
35+
mkdir -p $out
36+
cp -r dorado-0.7.1-linux-x64/* $out/
37+
'';
38+
}
39+
else
40+
null;
41+
42+
in
43+
{
44+
devShells.default = pkgs.mkShell {
45+
name = "oneroof";
46+
47+
buildInputs = [
48+
pkgs.stdenv
49+
pkgs.gcc
50+
pkgs.curl
51+
pkgs.wget
52+
pkgs.openjdk
53+
pkgs.git
54+
pkgs.cmake
55+
pkgs.libxml2
56+
pkgs.libxslt
57+
pkgs.libffi
58+
pkgs.pixi
59+
dorado
60+
] ++ pkgs.lib.optional (dorado != null) dorado;
61+
62+
shellHook = ''
63+
${pkgs.lib.optionalString (dorado != null) ''
64+
export PATH=$PATH:${dorado}/bin:${dorado}/lib
65+
''}
66+
echo "🔧 Entering oneroof dev shell"
67+
export PS1="(oneroof) $PS1"
68+
if [ ! -d .pixi/envs/default ]; then
69+
echo "Pixi env not found. Running install..."
70+
pixi install --frozen
71+
fi
72+
73+
export PATH="$PWD/.pixi/envs/default/bin:$PATH"
74+
'';
75+
};
76+
}
77+
);
78+
}

main.nf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,16 @@ workflow {
6363
Channel.fromPath( params.ref_gbk ) :
6464
Channel.empty()
6565

66+
ch_contam_fasta = params.contam_fasta && file(params.contam_fasta).isFile()
67+
? Channel.fromPath( params.contam_fasta )
68+
: Channel.empty()
69+
70+
ch_metagenomics_ref = params.meta_ref
71+
? file(params.meta_ref).isFile()
72+
? Channel.fromPath( params.meta_ref )
73+
: Channel.from( params.meta_ref )
74+
: Channel.empty()
75+
6676
ch_snpeff_config = params.snpEff_config ?
6777
Channel.fromPath( params.snpEff_config ) :
6878
Channel.empty()
@@ -74,7 +84,9 @@ workflow {
7484
ch_primer_bed,
7585
ch_refseq,
7686
ch_ref_gbk,
87+
ch_contam_fasta,
7788
ch_snpeff_config,
89+
ch_metagenomics_ref,
7890
)
7991

8092
} else if ( params.platform == "illumina" ) {

modules/bedtools.nf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ process GET_PRIMER_SEQS {
22

33
/* */
44

5+
array 1000
6+
57
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
68
maxRetries 2
79

@@ -18,4 +20,4 @@ process GET_PRIMER_SEQS {
1820
bedtools getfasta -fi ${refseq} -bed ${bed} > ${primer_combo}.fasta
1921
"""
2022

21-
}
23+
}

modules/chopper.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ process FILTER_WITH_CHOPPER {
66
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
77
maxRetries 2
88

9+
array 1000
910
cpus 4
1011

1112
input:

modules/cutadapt.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ process TRIM_ENDS_TO_PRIMERS {
55
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
66
maxRetries 2
77

8+
array 1000
89
cpus 3
910

1011
input:

0 commit comments

Comments
 (0)