KarchinLab · dltamayo · Nov 10, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/.github/workflows/nextflow-test.yaml b/.github/workflows/nextflow-test.yaml
@@ -1,36 +1,82 @@
 #inspired by https://docs.cirro.bio/pipelines/development/#automated-testing
-name: minimal-example
+name: Nextflow Tests
 
 on:
   pull_request:
       branches:
       - 'main'
   workflow_dispatch:
+
 jobs:
-  test:
+  unit-tests:
+    name: Unit Tests
     runs-on: ubuntu-latest
     steps:
-      - name: setup BATS
-        uses: mig4/setup-bats@v1
+      - name: Checkout repository
+        uses: actions/checkout@v3
         with:
-          bats-version: 1.2.1
+          submodules: recursive
 
-      - uses: actions/checkout@v3
+      - name: Setup Java
+        uses: actions/setup-java@v2
+        with:
+          distribution: 'temurin'
+          java-version: '17'
+
+      - name: Install Nextflow
+        run: |
+          wget -qO- get.nextflow.io | bash
+          sudo mv nextflow /usr/local/bin/
+
+      - name: Install nf-test
+        run: |
+          wget -qO- https://get.nf-test.com | bash
+          sudo mv nf-test /usr/local/bin
+
+      - name: Run unit tests
+        run: nf-test test tests/modules/ --verbose --junitxml=unit-test-results.xml
+
+      - name: Publish unit test results
+        if: always()
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        with:
+          files: unit-test-results.xml
+          check_name: Unit Test Results
+
+      - name: Upload unit test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: unit-test-results
+          path: |
+            .nf-test/tests/
+            unit-test-results.xml
+
+  pipeline-test:
+    name: Pipeline Integration Test
+    runs-on: ubuntu-latest
+    needs: unit-tests
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
         with:
           submodules: recursive
-      - uses: actions/setup-java@v2
+
+      - name: Setup Java
+        uses: actions/setup-java@v2
         with:
           distribution: 'temurin'
           java-version: '17'
 
-      - name: install nextflow
+      - name: Install Nextflow
         run: |
           wget -qO- get.nextflow.io | bash
           sudo mv nextflow /usr/local/bin/
-      - name: install nf-test
+
+      - name: Install nf-test
         run: |
           wget -qO- https://get.nf-test.com | bash
           sudo mv nf-test /usr/local/bin
 
-      - name: run pipeline
-        run: nf-test test
+      - name: Run pipeline test
+        run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
-        run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
+        run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
+
+      - name: Publish pipeline test results
+        if: always()
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        with:
+          files: pipeline-test-results.xml
+          check_name: Pipeline Test Results
+
+      - name: Upload pipeline test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pipeline-test-results
+          path: |
+            .nf-test/tests/
+            pipeline-test-results.xml
-        run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
+        run: nf-test test tests/main.nf.test --verbose --junitxml=pipeline-test-results.xml
+
+      - name: Publish pipeline test results
+        if: always()
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        with:
+          files: pipeline-test-results.xml
+          check_name: Pipeline Test Results
+
+      - name: Upload pipeline test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pipeline-test-results
+          path: |
+            .nf-test/tests/
+            pipeline-test-results.xml
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,11 @@ runs/*
 
 results*
 work
+out-*
+
+## nf-test
+.nf-test/
+.nf-test.log
 
 notebooks/*.png
 notebooks/*.ipynb

diff --git a/tests/fixtures/data/PD1_Patient01_Base.tsv b/tests/fixtures/data/PD1_Patient01_Base.tsv
@@ -0,0 +1,4 @@
+nucleotide	aminoAcid	count (templates/reads)	frequencyCount (%)	cdr3Length	vMaxResolved	vFamilyName	vGeneName	vGeneAllele	vFamilyTies	vGeneNameTies	vGeneAlleleTies	dMaxResolved	dFamilyName	dGeneName	dGeneAllele	dFamilyTies	dGeneNameTies	dGeneAlleleTies	jMaxResolved	jFamilyName	jGeneName	jGeneAllele	jFamilyTies	jGeneNameTies	jGeneAlleleTies	vDeletion	n1Insertion	d5Deletion	d3Deletion	n2Insertion	jDeletion	vIndex	n1Index	dIndex	n2Index	jIndex	estimatedNumberGenomes	sequenceStatus	cloneResolved	vOrphon	dOrphon	jOrphon	vFunction	dFunction	jFunction	fractionNucleated	vAlignLength	vAlignSubstitutionCount	vAlignSubstitutionIndexes	vAlignSubstitutionGeneThreePrimeIndexes	vSeqWithMutations
+ATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGAGGTACACCGGGGAGCTGTTTTTTGGAGAA	CAWRYTGELFF	69	3.397307756693681E-4	33	TCRBV30-01*01	TCRBV30	TCRBV30-01	01				unresolved		unresolved					TCRBJ02-02*01	TCRBJ02	TCRBJ02-02	01				3	0	0	0	2	3	48	-1	-1	59	61	null	In	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+CCTCTCACTGTGACATCGGCCCAAAAGAACCCGACAGCTTTCTATCTCTGTGCCAGTAGTATGAATCAGCCCCAGCATTTTGGTGAT	CASSMNQPQHF	134	6.597670136187728E-4	33	TCRBV19-01	TCRBV19	TCRBV19-01				01,02	unresolved		unresolved					TCRBJ01-05*01	TCRBJ01	TCRBJ01-05	01				3	0	0	0	1	4	48	-1	-1	62	63	null	In	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+ACTCTGACAGTGACCAGTGCCCATCCTGAAGACAGCAGCTTCTACATCTGCAGTGCTAGTGTGAACACTGAAGCTTTCTTTGGACAA	CSASVNTEAFF	39	1.9202174276964284E-4	33	TCRBV20-01*01	TCRBV20	TCRBV20-01	01				unresolved		unresolved					TCRBJ01-01*01	TCRBJ01	TCRBJ01-01	01				3	0	0	0	2	0	48	-1	-1	59	61	null	In	VJ	null	null	null	null	null	null	null	null	null	null	null	null
diff --git a/tests/fixtures/data/PD1_Patient01_Post.tsv b/tests/fixtures/data/PD1_Patient01_Post.tsv
@@ -0,0 +1,3 @@
+nucleotide	aminoAcid	count (templates/reads)	frequencyCount (%)	cdr3Length	vMaxResolved	vFamilyName	vGeneName	vGeneAllele	vFamilyTies	vGeneNameTies	vGeneAlleleTies	dMaxResolved	dFamilyName	dGeneName	dGeneAllele	dFamilyTies	dGeneNameTies	dGeneAlleleTies	jMaxResolved	jFamilyName	jGeneName	jGeneAllele	jFamilyTies	jGeneNameTies	jGeneAlleleTies	vDeletion	n1Insertion	d5Deletion	d3Deletion	n2Insertion	jDeletion	vIndex	n1Index	dIndex	n2Index	jIndex	estimatedNumberGenomes	sequenceStatus	cloneResolved	vOrphon	dOrphon	jOrphon	vFunction	dFunction	jFunction	fractionNucleated	vAlignLength	vAlignSubstitutionCount	vAlignSubstitutionIndexes	vAlignSubstitutionGeneThreePrimeIndexes	vSeqWithMutations
+ACACACCCTGCAGCCAGAAGACTCGGCCCTGTATCTCTGTGCCAGCAGCCAAGCTGAGGGGGGGCCCGGGAGCTGTTTTTTGGAGAA		2	1.3191741969527076E-4	44	TCRBV04-02*01	TCRBV04	TCRBV04-02	01				TCRBD02-01*01	TCRBD02	TCRBD02-01	01				TCRBJ02-02*01	TCRBJ02	TCRBJ02-02	01				1	4	9	0	3	9	37	53	57	64	67	null	Out	VDJ	null	null	null	null	null	null	null	null	null	null	null	null
+ACTCTGAAGATCCAGCCCTCAGAACCCAGGGACTCAGCTGTGTACTTCTGTGCCAGCAGTTTAAGCTACGAGCAGTACTTCGGGCCG	CASSLSYEQYF	2	1.3191741969527076E-4	33	unresolved	TCRBV12	unresolved			TCRBV12-03,TCRBV12-04		unresolved		unresolved		TCRBD01,TCRBD02	TCRBD01-01,TCRBD02-01		TCRBJ02-07*01	TCRBJ02	TCRBJ02-07	01				2	0	5	5	0	3	48	-1	63	-1	65	null	In	VDJ	null	null	null	null	null	null	null	null	null	null	null	null
diff --git a/tests/fixtures/data/PD1_Patient02_Base.tsv b/tests/fixtures/data/PD1_Patient02_Base.tsv
@@ -0,0 +1,5 @@
+nucleotide	aminoAcid	count (templates/reads)	frequencyCount (%)	cdr3Length	vMaxResolved	vFamilyName	vGeneName	vGeneAllele	vFamilyTies	vGeneNameTies	vGeneAlleleTies	dMaxResolved	dFamilyName	dGeneName	dGeneAllele	dFamilyTies	dGeneNameTies	dGeneAlleleTies	jMaxResolved	jFamilyName	jGeneName	jGeneAllele	jFamilyTies	jGeneNameTies	jGeneAlleleTies	vDeletion	n1Insertion	d5Deletion	d3Deletion	n2Insertion	jDeletion	vIndex	n1Index	dIndex	n2Index	jIndex	estimatedNumberGenomes	sequenceStatus	cloneResolved	vOrphon	dOrphon	jOrphon	vFunction	dFunction	jFunction	fractionNucleated	vAlignLength	vAlignSubstitutionCount	vAlignSubstitutionIndexes	vAlignSubstitutionGeneThreePrimeIndexes	vSeqWithMutations
+TACCTTGGAGATCCAGTCCACGGAGTCAGGGGACACAGCACTGTATTTCTGTGCCAGCAGCAATCCTACGAGCAGTACTTCGGGCCG		13	4.7905956921489506E-5	32	TCRBV21-01*01	TCRBV21	TCRBV21-01	01				unresolved		unresolved					TCRBJ02-07*01	TCRBJ02	TCRBJ02-07	01				3	0	0	0	0	1	49	-1	-1	-1	63	null	Out	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+CTTCACCTACACGCCCTGCAGCCAGAAGACTCAGCCCTGTATCTCTGCGCCAGCAGCCCCCCGGCCAACGTCCTGACTTTCGGGGCC	CASSPPANVLTF	15	5.52761041401802E-5	36	TCRBV04-01*01	TCRBV04	TCRBV04-01	01				unresolved		unresolved					TCRBJ02-06*01	TCRBJ02	TCRBJ02-06	01				4	0	0	0	4	6	45	-1	-1	58	62	null	In	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+TACCTTGGAGATCCAGTCCACGGAGTCAGGGGACACAGCACTGGATTTCTGTGCCAGCAGCAATCCTACGAGCAGTACTTCGGGCCG		3	1.105522082803604E-5	32	TCRBV21-01*01	TCRBV21	TCRBV21-01	01				unresolved		unresolved					TCRBJ02-07*01	TCRBJ02	TCRBJ02-07	01				3	0	0	0	0	1	49	-1	-1	-1	63	null	Out	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+GCCAGAAGACTCGGCCCTGTATCTCTGCGCCAGCAGCCTCCTGATCCGGGGTAGCGGGAGTACACCGGGGAGCTGTTTTTTGGAGAA		22339	0.0823208593591657	56	TCRBV04-03*01	TCRBV04	TCRBV04-03	01				TCRBD02-01*02	TCRBD02	TCRBD02-01	02				TCRBJ02-02*01	TCRBJ02	TCRBJ02-02	01				4	13	5	2	1	3	25	38	51	60	61	null	Out	VDJ	null	null	null	null	null	null	null	null	null	null	null	null
diff --git a/tests/fixtures/data/PD1_Patient03_Base.tsv b/tests/fixtures/data/PD1_Patient03_Base.tsv
@@ -0,0 +1,6 @@
+nucleotide	aminoAcid	count (templates/reads)	frequencyCount (%)	cdr3Length	vMaxResolved	vFamilyName	vGeneName	vGeneAllele	vFamilyTies	vGeneNameTies	vGeneAlleleTies	dMaxResolved	dFamilyName	dGeneName	dGeneAllele	dFamilyTies	dGeneNameTies	dGeneAlleleTies	jMaxResolved	jFamilyName	jGeneName	jGeneAllele	jFamilyTies	jGeneNameTies	jGeneAlleleTies	vDeletion	n1Insertion	d5Deletion	d3Deletion	n2Insertion	jDeletion	vIndex	n1Index	dIndex	n2Index	jIndex	estimatedNumberGenomes	sequenceStatus	cloneResolved	vOrphon	dOrphon	jOrphon	vFunction	dFunction	jFunction	fractionNucleated	vAlignLength	vAlignSubstitutionCount	vAlignSubstitutionIndexes	vAlignSubstitutionGeneThreePrimeIndexes	vSeqWithMutations
+GAGGATTTCCCGCTCAGGCTGCTGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGGTACGAGCAGTACTTCGGGCCG	CARYEQYF	58	5.50446003619657E-5	24	TCRBV06-05*01	TCRBV06	TCRBV06-05	01				unresolved		unresolved					TCRBJ02-07*01	TCRBJ02	TCRBJ02-07	01				9	0	0	0	1	4	57	-1	-1	65	66	null	In	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+AGCGCTTCTCCCTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGGCGAGCAGTACTTCGGGCCG		27	2.562421051332886E-5	25	TCRBV28-01*01	TCRBV28	TCRBV28-01	01				unresolved		unresolved					TCRBJ02-07*01	TCRBJ02	TCRBJ02-07	01				6	0	0	0	1	6	56	-1	-1	67	68	null	Out	VJ	null	null	null	null	null	null	null	null	null	null	null	null
+TCAGGCTGGAGTCGGCTGCTCCCTCCCAGACATCTGTGTACTTCTGTGCCAGCAGCGGGACAGGGTAATGGCTACACCTTCGGTTCG		73164	0.06943591622211825	37	TCRBV06-01*01	TCRBV06	TCRBV06-01	01				TCRBD01-01*01	TCRBD01	TCRBD01-01	01				TCRBJ01-02*01	TCRBJ01	TCRBJ01-02	01				6	1	0	3	2	6	44	55	56	65	67	null	Out	VDJ	null	null	null	null	null	null	null	null	null	null	null	null
+AACGCCTTGGAGCTGGACGACTCGGCCCTGTATCTCTGTGCCAGCAGCACCAGTGGGACGCAGACAGATACGCAGTATTTTGGCCCA	CASSTSGTQTDTQYF	11786	0.011185442411484961	45	TCRBV05-04*01	TCRBV05	TCRBV05-04	01				unresolved		unresolved		TCRBD01,TCRBD02	TCRBD01-01,TCRBD02-01		TCRBJ02-03*01	TCRBJ02	TCRBJ02-03	01				4	6	0	7	4	3	36	48	54	59	63	null	In	VDJ	null	null	null	null	null	null	null	null	null	null	null	null
+ACCTTGGAGATCCAGTCCACGGAGTCAGGGGACACAGCACTGTATTTCTGTGCCAGCAGCCTTTTAGGGGCGGCTTTCTTTGGACAA	CASSLLGAAFF	9662	0.009169671184436423	33	TCRBV21-01*01	TCRBV21	TCRBV21-01	01				TCRBD01-01*01	TCRBD01	TCRBD01-01	01				TCRBJ01-01*01	TCRBJ01	TCRBJ01-01	01				5	5	5	2	2	11	48	60	65	70	72	null	In	VDJ	null	null	null	null	null	null	null	null	null	null	null	null
diff --git a/tests/fixtures/data/malformed_adaptive.tsv b/tests/fixtures/data/malformed_adaptive.tsv
@@ -0,0 +1,3 @@
+nucleotide	aminoAcid	count (templates/reads)
+ATCCTGAGTTCTAAGAAGCTCCTTCTCAGTGACTCTGGCTTCTATCTCTGTGCCTGGAGGTACACCGGGGAGCTGTTTTTTGGAGAA	CAWRYTGELFF	69
+This line is missing required columns
diff --git a/tests/fixtures/empty_samplesheet.csv b/tests/fixtures/empty_samplesheet.csv
@@ -0,0 +1 @@
+sample,subject_id,timepoint,origin,file
diff --git a/tests/fixtures/malformed_samplesheet.csv b/tests/fixtures/malformed_samplesheet.csv
@@ -0,0 +1,4 @@
+sample,subject_id,timepoint,origin,file
+Patient01_Base,Patient01,Base,tumor,tests/fixtures/data/PD1_Patient01_Base.tsv
+This is not a valid CSV line
+Patient02_Base,Patient02,Base
diff --git a/tests/fixtures/missing_columns_samplesheet.csv b/tests/fixtures/missing_columns_samplesheet.csv
@@ -0,0 +1,3 @@
+sample,file
+Patient01_Base,tests/fixtures/data/PD1_Patient01_Base.tsv
+Patient02_Base,tests/fixtures/data/PD1_Patient02_Base.tsv
diff --git a/tests/fixtures/valid_samplesheet.csv b/tests/fixtures/valid_samplesheet.csv
@@ -0,0 +1,5 @@
+sample,subject_id,timepoint,origin,file
+Patient01_Base,Patient01,Base,tumor,tests/fixtures/data/PD1_Patient01_Base.tsv
+Patient02_Base,Patient02,Base,tumor,tests/fixtures/data/PD1_Patient02_Base.tsv
+Patient03_Base,Patient03,Base,tumor,tests/fixtures/data/PD1_Patient03_Base.tsv
+Patient01_Post,Patient01,Post,tumor,tests/fixtures/data/PD1_Patient01_Post.tsv
diff --git a/tests/modules/local/airr_convert/convert_adaptive.nf.test b/tests/modules/local/airr_convert/convert_adaptive.nf.test
@@ -0,0 +1,207 @@
+nextflow_process {
+
+    name "Test CONVERT_ADAPTIVE"
+    script "modules/local/airr_convert/convert_adaptive.nf"
+    process "CONVERT_ADAPTIVE"
+
+    // Note: Test fixtures in tests/fixtures/data/ contain small subsets of
+    // Adaptive Biotechnologies format TCR data for testing conversion to AIRR format
+
+    test("Should convert valid Adaptive format to AIRR") {
+
+        tag "basic"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [sample: 'Patient01_Base'],
+                    file("${projectDir}/tests/fixtures/data/PD1_Patient01_Base.tsv")
+                ]
+                input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
+                input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert process.out.adaptive_convert
+
+            with(process.out.adaptive_convert) {
+                assert size() == 1
+
+                // Check the tuple structure [sample_meta, file]
+                def (meta, airr_file) = get(0)
+                assert meta.sample == 'Patient01_Base'
+
+                // Check output file exists and has correct name
+                def file_path = path(airr_file)
+                assert file_path.exists()
+                assert file_path.getFileName().toString() == 'Patient01_Base_airr.tsv'
+
+                // Verify AIRR format structure
+                def lines = file_path.readLines()
+                assert lines.size() > 1  // Header + data rows
+
+                // Check for AIRR standard columns
+                def header = lines[0]
+                assert header.contains("sequence_id")
+                assert header.contains("sequence")
+                assert header.contains("junction")
+                assert header.contains("junction_aa")
+                assert header.contains("v_call")
+                assert header.contains("d_call")
+                assert header.contains("j_call")
+                assert header.contains("productive")
+                assert header.contains("duplicate_count")
+            }
+        }
+    }
+
+    test("Should convert multiple samples from minimal-example") {
+
+        tag "integration"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [sample: 'Patient02_Base'],
+                    file("${projectDir}/tests/test_data/minimal-example/PD1_Patient02_Base.tsv")
+                ]
+                input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
+                input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert process.out.adaptive_convert
+
+            with(process.out.adaptive_convert) {
+                def (meta, airr_file) = get(0)
+                assert meta.sample == 'Patient02_Base'
+
+                def file_path = path(airr_file)
+                assert file_path.exists()
+
+                // Verify substantial data was converted
+                def lines = file_path.readLines()
+                assert lines.size() > 10  // Should have many rows from full dataset
+            }
+        }
+    }
+
+    test("Should preserve sample metadata through conversion") {
+
+        tag "metadata"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [
+                        sample: 'Patient03_Base',
+                        subject_id: 'Patient03',
+                        timepoint: 'Base',
+                        origin: 'tumor'
+                    ],
+                    file("${projectDir}/tests/fixtures/data/PD1_Patient03_Base.tsv")
+                ]
+                input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
+                input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert process.out.adaptive_convert
+
+            with(process.out.adaptive_convert) {
+                def (meta, airr_file) = get(0)
+
+                // Verify all metadata is preserved
+                assert meta.sample == 'Patient03_Base'
+                assert meta.subject_id == 'Patient03'
+                assert meta.timepoint == 'Base'
+                assert meta.origin == 'tumor'
+
+                assert path(airr_file).exists()
+            }
+        }
+    }
+
+    test("Should handle malformed Adaptive data gracefully") {
+
+        tag "error-handling"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [sample: 'MalformedSample'],
+                    file("${projectDir}/tests/fixtures/data/malformed_adaptive.tsv")
+                ]
+                input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
+                input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
+                """
+            }
+        }
+
+        then {
+            // Should fail due to missing required columns
+            assert process.failed
+        }
+    }
+
+    test("Should convert V, D, J gene calls correctly") {
+
+        tag "gene-conversion"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [sample: 'Patient01_Base'],
+                    file("${projectDir}/tests/fixtures/data/PD1_Patient01_Base.tsv")
+                ]
+                input[1] = file("${projectDir}/assets/airr/airr_rearrangement_schema.json")
+                input[2] = file("${projectDir}/assets/airr/imgt_adaptive_lookup.tsv")
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert process.out.adaptive_convert
+
+            with(process.out.adaptive_convert) {
+                def (meta, airr_file) = get(0)
+                def file_path = path(airr_file)
+                def lines = file_path.readLines()
+
+                // Check that gene calls are present in output
+                def header = lines[0].split('\t')
+                def v_idx = header.findIndexOf { it == 'v_call' }
+                def d_idx = header.findIndexOf { it == 'd_call' }
+                def j_idx = header.findIndexOf { it == 'j_call' }
+
+                assert v_idx >= 0
+                assert d_idx >= 0
+                assert j_idx >= 0
+
+                // Check first data row has gene calls
+                if (lines.size() > 1) {
+                    def first_row = lines[1].split('\t')
+                    // Should have IMGT format gene calls (e.g., TRBV30-01*01)
+                    if (first_row.size() > v_idx && first_row[v_idx]) {
+                        assert first_row[v_idx].contains('TRB') || first_row[v_idx].isEmpty()
+                    }
+                }
+            }
+        }
+    }
+}