Skip to content

Commit 42b3e95

Browse files
authored
Merge pull request #34 from seandavi/copilot/refactor-nf-core-pipeline
Refactor pipeline to nf-core DSL2 modular architecture
2 parents 4cde6fb + 3664460 commit 42b3e95

File tree

33 files changed

+2231
-861
lines changed

33 files changed

+2231
-861
lines changed

.github/workflows/ci.yml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
name: nf-core CI
2+
# This workflow runs the pipeline with the minimal test dataset to check that it completes without any errors
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- master
8+
pull_request:
9+
release:
10+
types: [published]
11+
12+
# Cancel previous runs if a new one is triggered
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
15+
cancel-in-progress: true
16+
17+
jobs:
18+
test:
19+
name: Run pipeline with test data
20+
# Only run on push if this is the main branch (merged PRs)
21+
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'seandavi/curatedMetagenomicsNextflow') }}"
22+
runs-on: ubuntu-latest
23+
strategy:
24+
matrix:
25+
NXF_VER:
26+
- "23.04.0"
27+
- "latest-everything"
28+
steps:
29+
- name: Check out pipeline code
30+
uses: actions/checkout@v4
31+
32+
- name: Install Nextflow
33+
uses: nf-core/setup-nextflow@v2
34+
with:
35+
version: "${{ matrix.NXF_VER }}"
36+
37+
- name: Disk space cleanup
38+
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
39+
40+
- name: Run pipeline with test data (stub)
41+
run: |
42+
nextflow run ${GITHUB_WORKSPACE} -profile test,docker -stub-run --outdir ./results
43+
44+
profile:
45+
name: Run profile tests
46+
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'seandavi/curatedMetagenomicsNextflow') }}"
47+
runs-on: ubuntu-latest
48+
strategy:
49+
matrix:
50+
profile:
51+
- "test"
52+
steps:
53+
- name: Check out pipeline code
54+
uses: actions/checkout@v4
55+
56+
- name: Install Nextflow
57+
uses: nf-core/setup-nextflow@v2
58+
59+
- name: Disk space cleanup
60+
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
61+
62+
- name: Run pipeline with test data
63+
run: |
64+
nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker -stub-run --outdir ./results

.github/workflows/linting.yml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
name: nf-core linting
2+
# This workflow is triggered on pushes and PRs to the repository.
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- master
8+
pull_request:
9+
release:
10+
types: [published]
11+
12+
# Cancel previous runs if a new one is triggered
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
15+
cancel-in-progress: true
16+
17+
jobs:
18+
pre-commit:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v4
22+
23+
- name: Set up Python
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: "3.11"
27+
28+
- uses: pre-commit/action@v3.0.1
29+
# FIXME Flip this off once we get to a good state
30+
continue-on-error: true
31+
32+
prettier:
33+
runs-on: ubuntu-latest
34+
steps:
35+
- name: Check out repository
36+
uses: actions/checkout@v4
37+
38+
- name: Install NodeJS
39+
uses: actions/setup-node@v4
40+
41+
- name: Install Prettier
42+
run: npm install -g prettier
43+
44+
- name: Run Prettier --check
45+
run: prettier --check .
46+
47+
editorconfig:
48+
runs-on: ubuntu-latest
49+
steps:
50+
- uses: actions/checkout@v4
51+
52+
- uses: editorconfig-checker/action-editorconfig-checker@main
53+
54+
- run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test)
55+
56+
nf-core-lint:
57+
runs-on: ubuntu-latest
58+
steps:
59+
- name: Check out pipeline code
60+
uses: actions/checkout@v4
61+
62+
- name: Install Nextflow
63+
uses: nf-core/setup-nextflow@v2
64+
65+
- uses: actions/setup-python@v5
66+
with:
67+
python-version: "3.11"
68+
architecture: "x64"
69+
70+
- name: Install dependencies
71+
run: |
72+
python -m pip install --upgrade pip
73+
pip install nf-core
74+
75+
- name: Run nf-core lint
76+
run: nf-core lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md
77+
continue-on-error: true
78+
79+
- name: Upload linting log file artifact
80+
if: ${{ always() }}
81+
uses: actions/upload-artifact@v4
82+
with:
83+
name: linting-logs
84+
path: |
85+
lint_results.md

.nf-core.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
repository_type: pipeline
2+
nf_core_version: "2.14.1"
3+
org_path: seandavi
4+
lint:
5+
files_exist:
6+
- .github/workflows/ci.yml
7+
- .github/workflows/linting.yml
8+
- CHANGELOG.md
9+
- CODE_OF_CONDUCT.md
10+
- CITATIONS.md
11+
files_unchanged: []
12+
nextflow_config:
13+
- manifest.name
14+
- manifest.version
15+
- manifest.description
16+
- manifest.author

.prettierignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Ignore Nextflow and Groovy files (no parser available)
2+
*.nf
3+
*.config
4+
5+
# Ignore existing files from before refactoring
6+
docker/cloudbuild.yaml
7+
nextflow_schema.json
8+
unitn_setup.md
9+
10+
# Ignore build artifacts and dependencies
11+
work/
12+
.nextflow*
13+
results/

CHANGELOG.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Changelog
2+
3+
All notable changes to this project will be documented in this file.
4+
5+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7+
8+
## [1.3.0] - 2025-10-30
9+
10+
### Added
11+
12+
- Complete nf-core refactoring of pipeline structure
13+
- Modularized all processes following nf-core DSL2 conventions
14+
- Added `workflows/` directory with main workflow
15+
- Added `modules/local/` directory with individual process modules
16+
- Added `conf/` directory with configuration files:
17+
- `base.config` for base process configuration
18+
- `modules.config` for module-specific settings
19+
- `test.config` for test profile
20+
- Added `.nf-core.yml` configuration file
21+
- Added GitHub Actions CI/CD workflows for linting and testing
22+
- Added module metadata files (`meta.yml`) for documentation
23+
- Updated README with comprehensive nf-core-style documentation
24+
- Standardized parameter naming (`--input`, `--outdir`)
25+
- Added `check_max()` function for resource management
26+
- Added support for multiple container engines (Docker, Singularity, Podman)
27+
28+
### Changed
29+
30+
- Refactored monolithic `main.nf` into modular structure
31+
- Updated `nextflow.config` to follow nf-core conventions
32+
- Improved parameter handling with backwards compatibility
33+
- Enhanced error handling and validation
34+
- Updated manifest information
35+
- Improved output organization with `pipeline_info` directory
36+
37+
### Improved
38+
39+
- Better separation of concerns with modular architecture
40+
- Easier maintenance and updates
41+
- More portable across different compute environments
42+
- Better documentation and help messages
43+
- Improved resource allocation with labels
44+
- Enhanced container management
45+
46+
## [1.2.0] and earlier
47+
48+
Previous versions before nf-core refactoring. See git history for details.

CITATIONS.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Citations
2+
3+
## Pipeline Tools
4+
5+
### MetaPhlAn 4
6+
7+
> Blanco-Míguez A, Beghini F, Cumbo F, McIver LJ, Thompson KN, Zolfo M, Manghi P, Dubois L, Huang KD, Thomas AM, Nickols WA, Piccinno G, Piperni E, Punčochář M, Valles-Colomer M, Tett A, Giordano F, Davies R, Wolf J, Berry SE, Spector TD, Franzosa EA, Pasolli E, Asnicar F, Huttenhower C, Segata N. Extending and improving metagenomic taxonomic profiling with uncharacterized species using MetaPhlAn 4. Nat Biotechnol. 2023 Nov;41(11):1633-1644. doi: 10.1038/s41587-023-01688-w. Epub 2023 Sep 25. PMID: 37709786; PMCID: PMC10579592.
8+
9+
### HUMAnN 3 / bioBakery 3
10+
11+
> Beghini F, McIver LJ, Blanco-Míguez A, Dubois L, Asnicar F, Maharjan S, Mailyan A, Manghi P, Scholz M, Thomas AM, Valles-Colomer M, Weingart G, Zhang Y, Zolfo M, Huttenhower C, Franzosa EA, Segata N. Integrating taxonomic, functional, and strain-level profiling of diverse microbial communities with bioBakery 3. eLife. 2021 May 4;10:e65088. doi: 10.7554/eLife.65088. PMID: 33944776; PMCID: PMC8096432.
12+
13+
### KneadData
14+
15+
> McIver LJ, Abu-Ali G, Franzosa EA, Schwager R, Morgan XC, Waldron L, Segata N, Huttenhower C. bioBakery: a meta'omic analysis environment. Bioinformatics. 2018 Apr 1;34(7):1235-1237. doi: 10.1093/bioinformatics/btx754. PMID: 29194469; PMCID: PMC6030888.
16+
17+
### SRA Toolkit (fasterq-dump)
18+
19+
> Leinonen R, Sugawara H, Shumway M; International Nucleotide Sequence Database Collaboration. The sequence read archive. Nucleic Acids Res. 2011 Jan;39(Database issue):D19-21. doi: 10.1093/nar/gkq1019. Epub 2010 Nov 9. PMID: 21062823; PMCID: PMC3013647.
20+
21+
### FastQC
22+
23+
> Andrews S. FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online at: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
24+
25+
### Nextflow
26+
27+
> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PMID: 28398311.
28+
29+
### Docker
30+
31+
> Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2.
32+
33+
### Singularity
34+
35+
> Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. PMID: 28494014; PMCID: PMC5426675.
36+
37+
## Software packaging/containerisation tools
38+
39+
### Conda
40+
41+
> Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web.
42+
43+
### Docker
44+
45+
> Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2.
46+
47+
## Data
48+
49+
If you use data from curatedMetagenomicData, please cite:
50+
51+
> Pasolli E, Schiffer L, Manghi P, Renson A, Obenchain V, Truong DT, Beghini F, Malik F, Ramos M, Dowd JB, Huttenhower C, Morgan M, Segata N, Waldron L. Accessible, curated metagenomic data through ExperimentHub. Nat Methods. 2017 Nov;14(11):1023-1024. doi: 10.1038/nmeth.4468. Epub 2017 Oct 31. PMID: 29088129; PMCID: PMC5685312.

0 commit comments

Comments
 (0)