nrminor
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 10 deletions b/‎.github/workflows/test.yml‎
Lines changed: 2 additions & 10 deletions
diff --git a/‎bin/generate_variant_pivot.py‎
Lines changed: 2 additions & 2 deletions b/‎bin/generate_variant_pivot.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bin/ivar_variants_to_vcf.py‎
Lines changed: 83 additions & 35 deletions b/‎bin/ivar_variants_to_vcf.py‎
Lines changed: 83 additions & 35 deletions
diff --git a/‎bin/prepare_primers.py‎
Lines changed: 4 additions & 1 deletion b/‎bin/prepare_primers.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎bin/test_concat_consensus.py‎
Lines changed: 20 additions & 13 deletions b/‎bin/test_concat_consensus.py‎
Lines changed: 20 additions & 13 deletions
@@ -31,19 +31,11 @@ jobs:
 
       - name: Install dependencies with UV
         run: |
-          uv sync --dev --frozen
+          uv sync --frozen
 
       - name: Run Python tests with pytest
         run: |
-          uv run pytest bin/ -v --cov=bin --cov-report=xml --cov-report=term-missing
-
-      - name: Upload coverage reports
-        uses: codecov/codecov-action@v4
-        with:
-          files: ./coverage.xml
-          flags: python-${{ matrix.python-version }}
-          name: Python ${{ matrix.python-version }}
-        if: matrix.python-version == '3.12'
+          uv run pytest
 
   python-tests-tox:
     runs-on: ubuntu-latest
 
@@ -44,7 +44,7 @@ def main() -> None:
         separator="\t",
         has_header=False,
         skip_rows=1,
-        columns=[
+        new_columns=[
             "contig",
             "ref",
             "pos",
@@ -60,7 +60,7 @@ def main() -> None:
             "aa_pos",
         ],
     ).with_columns(pl.col("aa_effect").str.replace(".p", "").alias("aa_effect"))
-    logger.info("Hi mom!")
+    logger.info("Pivot implementation coming soon!")
 
 
 if __name__ == "__main__":
 
@@ -20,6 +20,7 @@
 for validation. Features a beautiful command-line interface built with Typer and Rich.
 """
 
+import gzip
 from enum import Enum
 from pathlib import Path
 from typing import Annotated, cast
@@ -97,9 +98,17 @@ class IvarVariant(BaseModel):
     @field_validator("ref_rv", "alt_rv")
     @classmethod
     def validate_reverse_depth(cls, v: int, info: ValidationInfo) -> int:
-        """Ensure reverse depth doesn't exceed total depth."""
-        if "ref_dp" in info.data and v > info.data["ref_dp"]:
-            msg = "Reverse depth cannot exceed total depth"
+        """Ensure reverse depth doesn't exceed total depth for that allele."""
+        if info.field_name == "ref_rv":
+            depth_field = "ref_dp"
+        elif info.field_name == "alt_rv":
+            depth_field = "alt_dp"
+        else:
+            msg = f"Unexpected field in reverse depth validator: {info.field_name}"
+            raise ValueError(msg)
+
+        if depth_field in info.data and v > info.data[depth_field]:
+            msg = f"Reverse depth ({info.field_name}) cannot exceed total depth ({depth_field})"
             raise ValueError(msg)
         return v
 
@@ -178,7 +187,10 @@ def validate_output_dir(cls, v: Path) -> Path:
 
 
 def calculate_strand_bias_pvalue(
-    ref_dp: int, ref_rv: int, alt_dp: int, alt_rv: int
+    ref_dp: int,
+    ref_rv: int,
+    alt_dp: int,
+    alt_rv: int,
 ) -> float:
     """Calculate p-value for strand bias using Fisher's exact test.
 
@@ -199,7 +211,7 @@ def calculate_strand_bias_pvalue(
     )
     _odds_ratio, pvalue = cast(
         "tuple[float, float]",
-        fisher_exact(contingency_table, alternative="greater"),
+        fisher_exact(contingency_table, alternative="two-sided"),
     )
     return pvalue
 
@@ -289,9 +301,7 @@ def create_filter_expr(config: ConversionConfig) -> pl.Expr:
 
     # iVar PASS filter
     filters.append(
-        pl.when(pl.col("PASS"))
-        .then(pl.lit(""))
-        .otherwise(pl.lit(FilterType.FAIL_TEST.value)),
+        pl.when(pl.col("PASS")).then(pl.lit("")).otherwise(pl.lit(FilterType.FAIL_TEST.value)),
     )
 
     # Quality filter
@@ -317,7 +327,8 @@ def create_filter_expr(config: ConversionConfig) -> pl.Expr:
         .list.join(";")
         .fill_null("")
         .map_elements(
-            lambda x: FilterType.PASS.value if x == "" else x, return_dtype=pl.Utf8
+            lambda x: FilterType.PASS.value if x == "" else x,
+            return_dtype=pl.Utf8,
         )
     )
 
@@ -345,7 +356,8 @@ def create_sample_info_expr() -> pl.Expr:
 
 
 def transform_ivar_to_vcf(
-    ivar_lf: pl.LazyFrame, config: ConversionConfig
+    ivar_lf: pl.LazyFrame,
+    config: ConversionConfig,
 ) -> pl.LazyFrame:
     """Transform iVar data to VCF format using pure expressions.
 
@@ -382,7 +394,7 @@ def transform_ivar_to_vcf(
                 ],
             ).alias("INFO"),
             pl.lit(
-                "GT:DP:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ"
+                "GT:DP:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ",
             ).alias("FORMAT"),
             create_sample_info_expr().alias("SAMPLE"),
         ],
@@ -400,7 +412,8 @@ def find_consecutive_variants_expr() -> pl.Expr:
 
 
 def process_consecutive_snps(
-    ivar_lf: pl.LazyFrame, config: ConversionConfig
+    ivar_lf: pl.LazyFrame,
+    config: ConversionConfig,
 ) -> pl.LazyFrame:
     """Process consecutive SNPs for potential merging.
 
@@ -509,7 +522,6 @@ def write_vcf_file(
     # Write file (handle gzipped output)
     if str(filepath).endswith(".gz"):
         # For gzip, we need to write everything as text to the same handle
-        import gzip
 
         with gzip.open(filepath, "wt") as f:
             f.write(header_text)
@@ -539,6 +551,52 @@ def process_ivar_file(config: ConversionConfig) -> None:
         task = progress.add_task("[cyan]Loading iVar data...", total=None)
         ivar_df = pl.scan_csv(str(config.file_in), separator="\t")
 
+        # Check if input has any data rows (collect schema to check)
+        progress.update(task, description="[yellow]Checking input data...")
+        try:
+            row_count = ivar_df.select(pl.len()).collect().item()
+        except pl.exceptions.NoDataError:
+            row_count = 0
+
+        # Generate headers (needed regardless of data)
+        progress.update(task, description="[yellow]Generating VCF headers...")
+        headers = generate_vcf_header(config)
+        sample_name = config.file_in.stem
+
+        if row_count == 0:
+            # Handle empty input: write VCF with headers only
+            progress.update(
+                task,
+                description="[yellow]No variants found, writing empty VCF...",
+            )
+            empty_df = pl.DataFrame(
+                schema={
+                    "CHROM": pl.Utf8,
+                    "POS": pl.Int64,
+                    "ID": pl.Utf8,
+                    "REF": pl.Utf8,
+                    "ALT": pl.Utf8,
+                    "QUAL": pl.Utf8,
+                    "FILTER": pl.Utf8,
+                    "INFO": pl.Utf8,
+                    "FORMAT": pl.Utf8,
+                    "SAMPLE": pl.Utf8,
+                },
+            )
+            write_vcf_file(empty_df, config.file_out, headers, sample_name)
+
+            all_hap_path = (
+                config.file_out.parent / f"{config.file_out.stem}_all_hap{config.file_out.suffix}"
+            )
+            write_vcf_file(empty_df, all_hap_path, headers, sample_name)
+
+            progress.update(
+                task,
+                description="[bold yellow]✓ No variants found, empty VCF written",
+                completed=True,
+            )
+            return
+
         # Transform to VCF format
         progress.update(task, description="[yellow]Transforming to VCF format...")
         vcf_df = transform_ivar_to_vcf(ivar_df, config)
@@ -551,27 +609,21 @@ def process_ivar_file(config: ConversionConfig) -> None:
         progress.update(task, description="[yellow]Collecting results...")
         result_df = processed_df.collect()
 
-        # Generate headers
-        progress.update(task, description="[yellow]Generating VCF headers...")
-        headers = generate_vcf_header(config)
-
-        # Get sample name from input file
-        sample_name = config.file_in.stem
-
         # Write consensus output
         progress.update(task, description="[green]Writing consensus VCF...")
         write_vcf_file(result_df, config.file_out, headers, sample_name)
 
         # Write all haplotypes output
         progress.update(task, description="[green]Writing all haplotypes VCF...")
         all_hap_path = (
-            config.file_out.parent
-            / f"{config.file_out.stem}_all_hap{config.file_out.suffix}"
+            config.file_out.parent / f"{config.file_out.stem}_all_hap{config.file_out.suffix}"
         )
         write_vcf_file(result_df, all_hap_path, headers, sample_name)
 
         progress.update(
-            task, description="[bold green]✓ Conversion complete!", completed=True
+            task,
+            description="[bold green]✓ Conversion complete!",
+            completed=True,
         )
 
 
@@ -758,14 +810,13 @@ def convert(  # noqa: PLR0913
 
         # Success message
         console.print(
-            f"\n[bold green]✓[/bold green] Successfully converted to {config.file_out}"
+            f"\n[bold green]✓[/bold green] Successfully converted to {config.file_out}",
         )
         all_hap_path = (
-            config.file_out.parent
-            / f"{config.file_out.stem}_all_hap{config.file_out.suffix}"
+            config.file_out.parent / f"{config.file_out.stem}_all_hap{config.file_out.suffix}"
         )
         console.print(
-            f"[bold green]✓[/bold green] All haplotypes written to {all_hap_path}"
+            f"[bold green]✓[/bold green] All haplotypes written to {all_hap_path}",
         )
 
     except Exception as e:  # noqa: BLE001
@@ -774,7 +825,7 @@ def convert(  # noqa: PLR0913
 
 
 @app.command()
-def validate(
+def validate(  # noqa: C901, PLR0912
     file_path: Annotated[
         Path,
         typer.Argument(
@@ -792,8 +843,6 @@ def validate(
     console.print(f"[cyan]Validating VCF file:[/cyan] {file_path}")
 
     try:
-        import gzip
-
         # Count header lines (handle gzipped files)
         header_count = 0
         if str(file_path).endswith(".gz"):
@@ -841,7 +890,7 @@ def validate(
 
         if missing_cols:
             console.print(
-                f"\n[red]✗ Missing required columns:[/red] {', '.join(missing_cols)}"
+                f"\n[red]✗ Missing required columns:[/red] {', '.join(missing_cols)}",
             )
         else:
             console.print("\n[green]✓ All required VCF columns present[/green]")
@@ -905,8 +954,7 @@ def stats(
         console.print("\n[bold]Variant Types:[/bold]")
         snp_count = len(
             ivar_df.filter(
-                ~pl.col("ALT").str.starts_with("+")
-                & ~pl.col("ALT").str.starts_with("-"),
+                ~pl.col("ALT").str.starts_with("+") & ~pl.col("ALT").str.starts_with("-"),
             ),
         )
         ins_count = len(ivar_df.filter(pl.col("ALT").str.starts_with("+")))
@@ -929,8 +977,8 @@ def stats(
         for low, high in freq_bins:
             count = len(
                 ivar_df.filter(
-                    (pl.col("ALT_FREQ") >= low) & (pl.col("ALT_FREQ") < high)
-                )
+                    (pl.col("ALT_FREQ") >= low) & (pl.col("ALT_FREQ") < high),
+                ),
             )
             if count > 0:
                 console.print(f"  • {low:.0%}-{high:.0%}: {count:,} variants")
 
@@ -492,7 +492,10 @@ def generate_splice_combinations(
 
     result: list[tuple[BedRecord, str]] = []
 
-    for splice_idx, ((fwd_rec, fwd_name), (rev_rec, rev_name)) in enumerate(all_pairs, 1):
+    for splice_idx, ((fwd_rec, fwd_name), (rev_rec, rev_name)) in enumerate(
+        all_pairs,
+        1,
+    ):
         # Strip the index suffix and add splice identifier
         fwd_base = fwd_name.rsplit("-", 1)[0]
         rev_base = rev_name.rsplit("-", 1)[0]
 
@@ -9,6 +9,7 @@
 from pathlib import Path
 from textwrap import dedent
 from unittest.mock import patch
+
 import pytest
 from Bio import SeqIO
 
@@ -174,24 +175,25 @@ def test_main_with_malformed_fasta(self, malformed_fasta_file, monkeypatch):
         assert str(records[0].seq) == ""  # No valid sequences parsed
 
     def test_file_naming_with_different_extensions(self, temp_dir, monkeypatch):
-        """Test that only .consensus.fasta files are processed."""
+        """Test that .consensus.fasta and .consensus.fa files are processed."""
         monkeypatch.chdir(temp_dir)
 
         # Create files with different extensions
         (temp_dir / "sample.consensus.fasta").write_text(">seq\nATCG")
-        (temp_dir / "other.fasta").write_text(">seq\nGCTA")
+        (temp_dir / "other.fasta").write_text(">seq\nGCTA")  # Should NOT match
         (temp_dir / "another.consensus.fa").write_text(
-            ">seq\nTTTT"
-        )  # Note: .fa not .fasta
+            ">seq\nTTTT",
+        )  # Should match - .fa is valid
 
         main()
 
         output_file = Path("all_sample_consensus.fasta")
         records = list(SeqIO.parse(output_file, "fasta"))
 
-        # Only the .consensus.fasta file should be processed
-        assert len(records) == 1
-        assert records[0].id == "sample"
+        # Both .consensus.fasta and .consensus.fa files should be processed
+        assert len(records) == 2
+        record_ids = {r.id for r in records}
+        assert record_ids == {"sample", "another.consensus.fa"}
 
     def test_sample_name_extraction(self, temp_dir, monkeypatch):
         """Test correct extraction of sample names from file paths."""
@@ -327,17 +329,18 @@ def test_glob_pattern_matching(self, temp_dir, monkeypatch):
         monkeypatch.chdir(temp_dir)
 
         # Create files that should and shouldn't match
+        # Pattern is *.consensus.fa* so matches .fasta, .fa, .fastq etc.
         matching_files = [
             "sample1.consensus.fasta",
             "sample2.consensus.fasta",
             "SAMPLE3.consensus.fasta",  # Test case sensitivity
+            "sample4.consensus.fa",  # .fa extension also matches
         ]
 
         non_matching_files = [
-            "sample.consensus.fastq",  # Wrong extension
-            "sample_consensus.fasta",  # Missing dot
-            "sample.consensus",  # Missing extension
-            "consensus.fasta",  # Missing sample name
+            "sample_consensus.fasta",  # Missing dot before consensus
+            "sample.consensus",  # Missing extension after .consensus
+            "consensus.fasta",  # Missing sample name prefix
         ]
 
         for filename in matching_files:
@@ -354,7 +357,8 @@ def test_glob_pattern_matching(self, temp_dir, monkeypatch):
         # Only matching files should be processed
         assert len(records) == len(matching_files)
         record_ids = {r.id for r in records}
-        expected_ids = {"sample1", "sample2", "SAMPLE3"}
+        # Note: .consensus.fasta is stripped, but .consensus.fa is not
+        expected_ids = {"sample1", "sample2", "SAMPLE3", "sample4.consensus.fa"}
         assert record_ids == expected_ids
 
 
@@ -367,7 +371,10 @@ def test_glob_pattern_matching(self, temp_dir, monkeypatch):
     ],
 )
 def test_performance_with_many_files(
-    temp_dir, monkeypatch, num_files, num_seqs_per_file
+    temp_dir,
+    monkeypatch,
+    num_files,
+    num_seqs_per_file,
 ):
     """Test performance with many input files."""
     monkeypatch.chdir(temp_dir)