Skip to content
Merged
68 changes: 57 additions & 11 deletions .github/workflows/nextflow-test.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,82 @@
#inspired by https://docs.cirro.bio/pipelines/development/#automated-testing
name: minimal-example
name: Nextflow Tests

on:
pull_request:
branches:
- 'main'
workflow_dispatch:

jobs:
test:
unit-tests:
name: Unit Tests
runs-on: ubuntu-latest
steps:
- name: setup BATS
uses: mig4/setup-bats@v1
- name: Checkout repository
uses: actions/checkout@v3
with:
bats-version: 1.2.1
submodules: recursive

- uses: actions/checkout@v3
- name: Setup Java
uses: actions/setup-java@v2
with:
distribution: 'temurin'
java-version: '17'

- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/

- name: Install nf-test
run: |
wget -qO- https://get.nf-test.com | bash
sudo mv nf-test /usr/local/bin

- name: Run unit tests
run: nf-test test tests/modules/ --verbose --junitxml=unit-test-results.xml

- name: Publish unit test results
if: always()
uses: EnricoMi/publish-unit-test-result-action@v2
with:
files: unit-test-results.xml
check_name: Unit Test Results

- name: Upload unit test results
if: always()
uses: actions/upload-artifact@v4
with:
name: unit-test-results
path: |
.nf-test/tests/
unit-test-results.xml

pipeline-test:
name: Pipeline Integration Test
runs-on: ubuntu-latest
needs: unit-tests
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: recursive
- uses: actions/setup-java@v2

- name: Setup Java
uses: actions/setup-java@v2
with:
distribution: 'temurin'
java-version: '17'

- name: install nextflow
- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: install nf-test

- name: Install nf-test
run: |
wget -qO- https://get.nf-test.com | bash
sudo mv nf-test /usr/local/bin

- name: run pipeline
run: nf-test test
- name: Run pipeline test
run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pipeline-test job generates pipeline-test-results.xml but doesn't have steps to publish or upload these results, unlike the unit-tests job (lines 39-53). Consider adding similar steps to publish and upload the pipeline test results for consistency and better CI/CD visibility.

Suggested change
run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
- name: Publish pipeline test results
if: always()
uses: EnricoMi/publish-unit-test-result-action@v2
with:
files: pipeline-test-results.xml
check_name: Pipeline Test Results
- name: Upload pipeline test results
if: always()
uses: actions/upload-artifact@v4
with:
name: pipeline-test-results
path: |
.nf-test/tests/
pipeline-test-results.xml

Copilot uses AI. Check for mistakes.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We would normally keep the --junitxml resulting artifact from Integration like tests. So in the ideal scenario we should keep this option. BUT, the integration tests here use a relatively large dataset, so the resulting artifact generated by the tests is quite large. Gigabytes in size IIRC.

The correct solution is to use a dataset that is a bit more truncated that the one in the minimal-example

5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ runs/*

results*
work
out-*

## nf-test
.nf-test/
.nf-test.log

notebooks/*.png
notebooks/*.ipynb
Expand Down
4 changes: 4 additions & 0 deletions tests/fixtures/data/PD1_Patient01_Base.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
nucleotide aminoAcid count (templates/reads) frequencyCount (%) cdr3Length vMaxResolved vFamilyName vGeneName vGeneAllele vFamilyTies vGeneNameTies vGeneAlleleTies dMaxResolved dFamilyName dGeneName dGeneAllele dFamilyTies dGeneNameTies dGeneAlleleTies jMaxResolved jFamilyName jGeneName jGeneAllele jFamilyTies jGeneNameTies jGeneAlleleTies vDeletion n1Insertion d5Deletion d3Deletion n2Insertion jDeletion vIndex n1Index dIndex n2Index jIndex estimatedNumberGenomes sequenceStatus cloneResolved vOrphon dOrphon jOrphon vFunction dFunction jFunction fractionNucleated vAlignLength vAlignSubstitutionCount vAlignSubstitutionIndexes vAlignSubstitutionGeneThreePrimeIndexes vSeqWithMutations
ATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGAGGTACACCGGGGAGCTGTTTTTTGGAGAA CAWRYTGELFF 69 3.397307756693681E-4 33 TCRBV30-01*01 TCRBV30 TCRBV30-01 01 unresolved unresolved TCRBJ02-02*01 TCRBJ02 TCRBJ02-02 01 3 0 0 0 2 3 48 -1 -1 59 61 null In VJ null null null null null null null null null null null null
CCTCTCACTGTGACATCGGCCCAAAAGAACCCGACAGCTTTCTATCTCTGTGCCAGTAGTATGAATCAGCCCCAGCATTTTGGTGAT CASSMNQPQHF 134 6.597670136187728E-4 33 TCRBV19-01 TCRBV19 TCRBV19-01 01,02 unresolved unresolved TCRBJ01-05*01 TCRBJ01 TCRBJ01-05 01 3 0 0 0 1 4 48 -1 -1 62 63 null In VJ null null null null null null null null null null null null
ACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGTGTGAACACTGAAGCTTTCTTTGGACAA CSASVNTEAFF 39 1.9202174276964284E-4 33 TCRBV20-01*01 TCRBV20 TCRBV20-01 01 unresolved unresolved TCRBJ01-01*01 TCRBJ01 TCRBJ01-01 01 3 0 0 0 2 0 48 -1 -1 59 61 null In VJ null null null null null null null null null null null null
3 changes: 3 additions & 0 deletions tests/fixtures/data/PD1_Patient01_Post.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nucleotide aminoAcid count (templates/reads) frequencyCount (%) cdr3Length vMaxResolved vFamilyName vGeneName vGeneAllele vFamilyTies vGeneNameTies vGeneAlleleTies dMaxResolved dFamilyName dGeneName dGeneAllele dFamilyTies dGeneNameTies dGeneAlleleTies jMaxResolved jFamilyName jGeneName jGeneAllele jFamilyTies jGeneNameTies jGeneAlleleTies vDeletion n1Insertion d5Deletion d3Deletion n2Insertion jDeletion vIndex n1Index dIndex n2Index jIndex estimatedNumberGenomes sequenceStatus cloneResolved vOrphon dOrphon jOrphon vFunction dFunction jFunction fractionNucleated vAlignLength vAlignSubstitutionCount vAlignSubstitutionIndexes vAlignSubstitutionGeneThreePrimeIndexes vSeqWithMutations
ACACACCCTGCAGCCAGAAGACTCGGCCCTGTATCTCTGTGCCAGCAGCCAAGCTGAGGGGGGGCCCGGGAGCTGTTTTTTGGAGAA 2 1.3191741969527076E-4 44 TCRBV04-02*01 TCRBV04 TCRBV04-02 01 TCRBD02-01*01 TCRBD02 TCRBD02-01 01 TCRBJ02-02*01 TCRBJ02 TCRBJ02-02 01 1 4 9 0 3 9 37 53 57 64 67 null Out VDJ null null null null null null null null null null null null
ACTCTGAAGATCCAGCCCTCAGAACCCAGGGACTCAGCTGTGTACTTCTGTGCCAGCAGTTTAAGCTACGAGCAGTACTTCGGGCCG CASSLSYEQYF 2 1.3191741969527076E-4 33 unresolved TCRBV12 unresolved TCRBV12-03,TCRBV12-04 unresolved unresolved TCRBD01,TCRBD02 TCRBD01-01,TCRBD02-01 TCRBJ02-07*01 TCRBJ02 TCRBJ02-07 01 2 0 5 5 0 3 48 -1 63 -1 65 null In VDJ null null null null null null null null null null null null
5 changes: 5 additions & 0 deletions tests/fixtures/data/PD1_Patient02_Base.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
nucleotide aminoAcid count (templates/reads) frequencyCount (%) cdr3Length vMaxResolved vFamilyName vGeneName vGeneAllele vFamilyTies vGeneNameTies vGeneAlleleTies dMaxResolved dFamilyName dGeneName dGeneAllele dFamilyTies dGeneNameTies dGeneAlleleTies jMaxResolved jFamilyName jGeneName jGeneAllele jFamilyTies jGeneNameTies jGeneAlleleTies vDeletion n1Insertion d5Deletion d3Deletion n2Insertion jDeletion vIndex n1Index dIndex n2Index jIndex estimatedNumberGenomes sequenceStatus cloneResolved vOrphon dOrphon jOrphon vFunction dFunction jFunction fractionNucleated vAlignLength vAlignSubstitutionCount vAlignSubstitutionIndexes vAlignSubstitutionGeneThreePrimeIndexes vSeqWithMutations
TACCTTGGAGATCCAGTCCACGGAGTCAGGGGACACAGCACTGTATTTCTGTGCCAGCAGCAATCCTACGAGCAGTACTTCGGGCCG 13 4.7905956921489506E-5 32 TCRBV21-01*01 TCRBV21 TCRBV21-01 01 unresolved unresolved TCRBJ02-07*01 TCRBJ02 TCRBJ02-07 01 3 0 0 0 0 1 49 -1 -1 -1 63 null Out VJ null null null null null null null null null null null null
CTTCACCTACACGCCCTGCAGCCAGAAGACTCAGCCCTGTATCTCTGCGCCAGCAGCCCCCCGGCCAACGTCCTGACTTTCGGGGCC CASSPPANVLTF 15 5.52761041401802E-5 36 TCRBV04-01*01 TCRBV04 TCRBV04-01 01 unresolved unresolved TCRBJ02-06*01 TCRBJ02 TCRBJ02-06 01 4 0 0 0 4 6 45 -1 -1 58 62 null In VJ null null null null null null null null null null null null
TACCTTGGAGATCCAGTCCACGGAGTCAGGGGACACAGCACTGGATTTCTGTGCCAGCAGCAATCCTACGAGCAGTACTTCGGGCCG 3 1.105522082803604E-5 32 TCRBV21-01*01 TCRBV21 TCRBV21-01 01 unresolved unresolved TCRBJ02-07*01 TCRBJ02 TCRBJ02-07 01 3 0 0 0 0 1 49 -1 -1 -1 63 null Out VJ null null null null null null null null null null null null
GCCAGAAGACTCGGCCCTGTATCTCTGCGCCAGCAGCCTCCTGATCCGGGGTAGCGGGAGTACACCGGGGAGCTGTTTTTTGGAGAA 22339 0.0823208593591657 56 TCRBV04-03*01 TCRBV04 TCRBV04-03 01 TCRBD02-01*02 TCRBD02 TCRBD02-01 02 TCRBJ02-02*01 TCRBJ02 TCRBJ02-02 01 4 13 5 2 1 3 25 38 51 60 61 null Out VDJ null null null null null null null null null null null null
6 changes: 6 additions & 0 deletions tests/fixtures/data/PD1_Patient03_Base.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
nucleotide aminoAcid count (templates/reads) frequencyCount (%) cdr3Length vMaxResolved vFamilyName vGeneName vGeneAllele vFamilyTies vGeneNameTies vGeneAlleleTies dMaxResolved dFamilyName dGeneName dGeneAllele dFamilyTies dGeneNameTies dGeneAlleleTies jMaxResolved jFamilyName jGeneName jGeneAllele jFamilyTies jGeneNameTies jGeneAlleleTies vDeletion n1Insertion d5Deletion d3Deletion n2Insertion jDeletion vIndex n1Index dIndex n2Index jIndex estimatedNumberGenomes sequenceStatus cloneResolved vOrphon dOrphon jOrphon vFunction dFunction jFunction fractionNucleated vAlignLength vAlignSubstitutionCount vAlignSubstitutionIndexes vAlignSubstitutionGeneThreePrimeIndexes vSeqWithMutations
GAGGATTTCCCGCTCAGGCTGCTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGGTACGAGCAGTACTTCGGGCCG CARYEQYF 58 5.50446003619657E-5 24 TCRBV06-05*01 TCRBV06 TCRBV06-05 01 unresolved unresolved TCRBJ02-07*01 TCRBJ02 TCRBJ02-07 01 9 0 0 0 1 4 57 -1 -1 65 66 null In VJ null null null null null null null null null null null null
AGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGGCGAGCAGTACTTCGGGCCG 27 2.562421051332886E-5 25 TCRBV28-01*01 TCRBV28 TCRBV28-01 01 unresolved unresolved TCRBJ02-07*01 TCRBJ02 TCRBJ02-07 01 6 0 0 0 1 6 56 -1 -1 67 68 null Out VJ null null null null null null null null null null null null
TCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGCGGGACAGGGTAATGGCTACACCTTCGGTTCG 73164 0.06943591622211825 37 TCRBV06-01*01 TCRBV06 TCRBV06-01 01 TCRBD01-01*01 TCRBD01 TCRBD01-01 01 TCRBJ01-02*01 TCRBJ01 TCRBJ01-02 01 6 1 0 3 2 6 44 55 56 65 67 null Out VDJ null null null null null null null null null null null null
AACGCCTTGGAGCTGGACGACTCGGCCCTGTATCTCTGTGCCAGCAGCACCAGTGGGACGCAGACAGATACGCAGTATTTTGGCCCA CASSTSGTQTDTQYF 11786 0.011185442411484961 45 TCRBV05-04*01 TCRBV05 TCRBV05-04 01 unresolved unresolved TCRBD01,TCRBD02 TCRBD01-01,TCRBD02-01 TCRBJ02-03*01 TCRBJ02 TCRBJ02-03 01 4 6 0 7 4 3 36 48 54 59 63 null In VDJ null null null null null null null null null null null null
ACCTTGGAGATCCAGTCCACGGAGTCAGGGGACACAGCACTGTATTTCTGTGCCAGCAGCCTTTTAGGGGCGGCTTTCTTTGGACAA CASSLLGAAFF 9662 0.009169671184436423 33 TCRBV21-01*01 TCRBV21 TCRBV21-01 01 TCRBD01-01*01 TCRBD01 TCRBD01-01 01 TCRBJ01-01*01 TCRBJ01 TCRBJ01-01 01 5 5 5 2 2 11 48 60 65 70 72 null In VDJ null null null null null null null null null null null null
3 changes: 3 additions & 0 deletions tests/fixtures/data/malformed_adaptive.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nucleotide aminoAcid count (templates/reads)
ATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGAGGTACACCGGGGAGCTGTTTTTTGGAGAA CAWRYTGELFF 69
This line is missing required columns
1 change: 1 addition & 0 deletions tests/fixtures/empty_samplesheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sample,subject_id,timepoint,origin,file
4 changes: 4 additions & 0 deletions tests/fixtures/malformed_samplesheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample,subject_id,timepoint,origin,file
Patient01_Base,Patient01,Base,tumor,tests/fixtures/data/PD1_Patient01_Base.tsv
This is not a valid CSV line
Patient02_Base,Patient02,Base
3 changes: 3 additions & 0 deletions tests/fixtures/missing_columns_samplesheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sample,file
Patient01_Base,tests/fixtures/data/PD1_Patient01_Base.tsv
Patient02_Base,tests/fixtures/data/PD1_Patient02_Base.tsv
5 changes: 5 additions & 0 deletions tests/fixtures/valid_samplesheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sample,subject_id,timepoint,origin,file
Patient01_Base,Patient01,Base,tumor,tests/fixtures/data/PD1_Patient01_Base.tsv
Patient02_Base,Patient02,Base,tumor,tests/fixtures/data/PD1_Patient02_Base.tsv
Patient03_Base,Patient03,Base,tumor,tests/fixtures/data/PD1_Patient03_Base.tsv
Patient01_Post,Patient01,Post,tumor,tests/fixtures/data/PD1_Patient01_Post.tsv
207 changes: 207 additions & 0 deletions tests/modules/local/airr_convert/convert_adaptive.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
nextflow_process {

name "Test CONVERT_ADAPTIVE"
script "modules/local/airr_convert/convert_adaptive.nf"
process "CONVERT_ADAPTIVE"

// Note: Test fixtures in tests/fixtures/data/ contain small subsets of
// Adaptive Biotechnologies format TCR data for testing conversion to AIRR format

test("Should convert valid Adaptive format to AIRR") {

tag "basic"

when {
process {
"""
input[0] = [
[sample: 'Patient01_Base'],
file("${projectDir}/tests/fixtures/data/PD1_Patient01_Base.tsv")
]
input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
"""
}
}

then {
assert process.success
assert process.out.adaptive_convert

with(process.out.adaptive_convert) {
assert size() == 1

// Check the tuple structure [sample_meta, file]
def (meta, airr_file) = get(0)
assert meta.sample == 'Patient01_Base'

// Check output file exists and has correct name
def file_path = path(airr_file)
assert file_path.exists()
assert file_path.getFileName().toString() == 'Patient01_Base_airr.tsv'

// Verify AIRR format structure
def lines = file_path.readLines()
assert lines.size() > 1 // Header + data rows

// Check for AIRR standard columns
def header = lines[0]
assert header.contains("sequence_id")
assert header.contains("sequence")
assert header.contains("junction")
assert header.contains("junction_aa")
assert header.contains("v_call")
assert header.contains("d_call")
assert header.contains("j_call")
assert header.contains("productive")
assert header.contains("duplicate_count")
}
}
}

test("Should convert multiple samples from minimal-example") {

tag "integration"

when {
process {
"""
input[0] = [
[sample: 'Patient02_Base'],
file("${projectDir}/tests/test_data/minimal-example/PD1_Patient02_Base.tsv")
]
input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
"""
}
}

then {
assert process.success
assert process.out.adaptive_convert

with(process.out.adaptive_convert) {
def (meta, airr_file) = get(0)
assert meta.sample == 'Patient02_Base'

def file_path = path(airr_file)
assert file_path.exists()

// Verify substantial data was converted
def lines = file_path.readLines()
assert lines.size() > 10 // Should have many rows from full dataset
}
}
}

test("Should preserve sample metadata through conversion") {

tag "metadata"

when {
process {
"""
input[0] = [
[
sample: 'Patient03_Base',
subject_id: 'Patient03',
timepoint: 'Base',
origin: 'tumor'
],
file("${projectDir}/tests/fixtures/data/PD1_Patient03_Base.tsv")
]
input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
"""
}
}

then {
assert process.success
assert process.out.adaptive_convert

with(process.out.adaptive_convert) {
def (meta, airr_file) = get(0)

// Verify all metadata is preserved
assert meta.sample == 'Patient03_Base'
assert meta.subject_id == 'Patient03'
assert meta.timepoint == 'Base'
assert meta.origin == 'tumor'

assert path(airr_file).exists()
}
}
}

test("Should handle malformed Adaptive data gracefully") {

tag "error-handling"

when {
process {
"""
input[0] = [
[sample: 'MalformedSample'],
file("${projectDir}/tests/fixtures/data/malformed_adaptive.tsv")
]
input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
"""
}
}

then {
// Should fail due to missing required columns
assert process.failed
}
}

test("Should convert V, D, J gene calls correctly") {

tag "gene-conversion"

when {
process {
"""
input[0] = [
[sample: 'Patient01_Base'],
file("${projectDir}/tests/fixtures/data/PD1_Patient01_Base.tsv")
]
input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
"""
}
}

then {
assert process.success
assert process.out.adaptive_convert

with(process.out.adaptive_convert) {
def (meta, airr_file) = get(0)
def file_path = path(airr_file)
def lines = file_path.readLines()

// Check that gene calls are present in output
def header = lines[0].split('\t')
def v_idx = header.findIndexOf { it == 'v_call' }
def d_idx = header.findIndexOf { it == 'd_call' }
def j_idx = header.findIndexOf { it == 'j_call' }

assert v_idx >= 0
assert d_idx >= 0
assert j_idx >= 0

// Check first data row has gene calls
if (lines.size() > 1) {
def first_row = lines[1].split('\t')
// Should have IMGT format gene calls (e.g., TRBV30-01*01)
if (first_row.size() > v_idx && first_row[v_idx]) {
assert first_row[v_idx].contains('TRB') || first_row[v_idx].isEmpty()
}
}
}
}
}
}
Loading