nrminor
diff --git a/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions b/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 110 additions & 0 deletions b/‎.github/workflows/test.yml‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 29 additions & 0 deletions b/‎.gitignore‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 13 additions & 5 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 143 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 143 additions & 0 deletions
@@ -1,2 +1,3 @@
 # GitHub syntax highlighting
 pixi.lock linguist-language=YAML
+llms.txt linguist-language=markdown linguist-detectable=true
@@ -0,0 +1,110 @@
+name: CI Tests
+
+on:
+  push:
+    branches: [main, dev, experimental]
+  pull_request:
+    branches: [main, dev]
+  workflow_dispatch:
+
+jobs:
+  python-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.10', '3.11', '3.12', '3.13']
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install UV
+        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+          cache-dependency-glob: "pyproject.toml"
+
+      - name: Install dependencies with UV
+        run: |
+          uv sync --dev --frozen
+
+      - name: Run Python tests with pytest
+        run: |
+          uv run pytest bin/ -v --cov=bin --cov-report=xml --cov-report=term-missing
+
+      - name: Upload coverage reports
+        uses: codecov/codecov-action@v4
+        with:
+          files: ./coverage.xml
+          flags: python-${{ matrix.python-version }}
+          name: Python ${{ matrix.python-version }}
+        if: matrix.python-version == '3.12'
+
+  python-tests-tox:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install UV
+        uses: astral-sh/setup-uv@v4
+        with:
+          enable-cache: true
+          cache-dependency-glob: "pyproject.toml"
+
+      - name: Install and run tox
+        run: |
+          uvx --from tox-uv tox -p auto
+
+      - name: Run linting with tox
+        run: |
+          uvx --from tox-uv tox -e lint
+
+  nextflow-tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        nextflow_version: ['23.10.0', 'latest']
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Nextflow
+        uses: nf-core/setup-nextflow@v2
+        with:
+          version: ${{ matrix.nextflow_version }}
+
+      - name: Set up nf-test
+        uses: nf-core/setup-nf-test@v1
+
+      - name: Run nf-test
+        run: |
+          nf-test test --verbose --profile test,docker
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: nf-test-results-${{ matrix.nextflow_version }}
+          path: |
+            .nf-test/
+            tests/output/
+
+      - name: Clean up
+        if: always()
+        run: |
+          rm -rf work/
+          rm -rf .nextflow/
+          rm -rf .nf-test/
@@ -19,6 +19,9 @@
 !.gitattributes
 !main.nf
 !nextflow.config
+!nf-test.config
+!CLAUDE.md
+!llms.txt
 
 # github action workflows
 !/.github
@@ -46,6 +49,8 @@
 # bin of executable scripts
 !/bin/
 !/bin/*.py
+!/bin/*.rs
+!/bin/*.ers
 !/bin/*.ts
 !/bin/*.js
 !/bin/*.R
@@ -54,6 +59,7 @@
 !/bin/*.lua
 !/bin/*.sh
 !/bin/*.awk
+!/bin/README.md
 
 # groovy libraries
 !/lib
@@ -65,3 +71,26 @@
 !/docs/*.md
 !/docs/*.pdf
 !/docs/*.html.gz
+
+# globus adapter
+!/globus
+!/globus/.gitignore
+!/globus/README.md
+!/globus/action_provider
+!/globus/action_provider/oneroof_action_provider.py
+!/globus/action_provider/requirements.txt
+!/globus/config
+!/globus/config/.env.template
+!/globus/config/*.json
+!/globus/flows
+!/globus/flows/*.json
+!/globus/scripts
+!/globus/scripts/*.sh
+!/globus/scripts/*.py
+
+# nf-test
+!tests/
+!tests/**/*
+!tests/**/*.nf.test
+!tests/data/
+!tests/data/**/*
@@ -7,9 +7,17 @@ repos:
     -   id: check-toml
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
--   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.9.6"
+# -   repo: https://github.com/astral-sh/ruff-pre-commit
+#     rev: "v0.9.6"
+#     hooks:
+#         - id: ruff
+#           args: ["--fix"]
+#         - id: ruff-format
+-   repo: local
     hooks:
-        - id: ruff
-          args: ["--fix"]
-        - id: ruff-format
+    -   id: no-env-files
+        name: Block .env files
+        entry: .env files must not be committed
+        language: fail
+        files: '\.env$'
+        description: 'Prevents accidental commit of .env files containing sensitive configuration'
@@ -0,0 +1,143 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+OneRoof is a Nextflow-based bioinformatics pipeline for base-calling, variant-calling, and consensus-calling of amplicon sequencing data. It supports both Nanopore (pod5/BAM/FASTQ) and Illumina (paired-end FASTQ) data, with particular focus on SARS-CoV-2 and H5N1 influenza genomic surveillance.
+
+## Key Commands
+
+### Development Environment Setup
+```bash
+# For environments with conda dependencies (full pipeline)
+pixi install --frozen
+pixi shell --frozen
+
+# For PyPI-only environments (Python development)
+uv venv
+source .venv/bin/activate  # or .venv\Scripts\activate on Windows
+uv sync
+```
+
+### Running the Pipeline
+```bash
+# Nanopore data from raw POD5s
+nextflow run . \
+  --pod5_dir my_pod5_dir \
+  --primer_bed my_primers.bed \
+  --refseq my_ref.fasta \
+  --ref_gbk my_ref.gbk \
+  --kit "SQK-NBD114-24"
+
+# Illumina data
+nextflow run . \
+  --illumina_fastq_dir my_illumina_reads/ \
+  --primer_bed my_primers.bed \
+  --refseq my_ref.fasta \
+  --ref_gbk my_ref.gbk
+
+# Run without containers (requires pixi environment)
+nextflow run . -profile containerless [options]
+```
+
+### Code Quality & Testing
+```bash
+# Python linting and formatting
+ruff check . --exit-zero --fix --unsafe-fixes
+ruff format .
+
+# Run Python tests (using uv for speed)
+uv run pytest bin/test_*.py
+# Or run tests with tox for multiple environments
+tox
+
+# Build documentation
+just docs
+
+# IMPORTANT: Modifying README.md
+# The README.md in the project root is generated from docs/index.qmd
+# NEVER edit README.md directly - it will be overwritten
+# Always edit docs/index.qmd and re-render:
+just make-readme  # or: just docs
+
+# Docker operations
+just docker-build
+just docker-push
+```
+
+## Architecture
+
+### Directory Structure
+- `main.nf` - Main workflow entry point that orchestrates platform-specific workflows
+- `workflows/` - Platform-specific workflows (nanopore.nf, illumina.nf)
+- `subworkflows/` - Reusable workflow components (alignment, variant_calling, primer_handling, etc.)
+- `modules/` - Individual process definitions for tools (dorado, minimap2, ivar, etc.)
+- `bin/` - Python utility scripts with PEP 723 inline dependencies (fully portable with uv)
+- `conf/` - Configuration files for different platforms and tools
+
+### Key Workflow Components
+
+1. **Data Ingestion** - Handles multiple input formats (pod5, BAM, FASTQ) with optional remote file watching
+2. **Primer Handling** - Validates primers, trims reads, and ensures complete amplicons
+3. **Alignment & Variant Calling** - Platform-specific alignment and variant calling using minimap2 and ivar/bcftools
+4. **Quality Control** - FastQC, MultiQC, and custom coverage plotting
+5. **Consensus Generation** - Creates consensus sequences with configurable frequency thresholds
+6. **Optional Features** - Metagenomics (Sylph), phylogenetics (Nextclade), haplotyping (Devider)
+
+### Technology Stack
+- **Workflow Engine**: Nextflow DSL2
+- **Container Support**: Docker, Singularity/Apptainer
+- **Environment Management**: Pixi (combines conda and PyPI dependencies), UV (fast Python package management)
+- **Languages**: Nextflow (Groovy), Python 3.10+
+- **Key Tools**: Dorado (basecalling), minimap2 (alignment), ivar/bcftools (variants), FastQC/MultiQC (QC)
+
+### Configuration Philosophy
+- Parameters are primarily set via command line arguments
+- Platform-specific configs (nanopore.config, illumina.config) are auto-loaded based on input data type
+- Container profiles (docker, singularity, apptainer, containerless) control execution environment
+- Advanced users can modify nextflow.config for fine-tuning
+
+### Important Parameters
+- `--pod5_batch_size`: Controls GPU memory usage during basecalling
+- `--min_variant_frequency`: Platform-specific defaults (0.05 for Illumina, 0.10 for Nanopore)
+- `--downsample_to`: Manages computational resources by limiting coverage depth
+- `--model`: Nanopore basecalling model (defaults to sup@latest)
+
+## Dependency Management
+
+### Python Package Management
+- **Always use `uv` instead of `pip`** for any Python package installation - it's significantly faster and more reliable
+- **Use `uv` for PyPI-only environments**: When working with Python scripts that only need PyPI dependencies
+- **Use `pixi` for mixed environments**: When conda dependencies are required (e.g., for the full pipeline)
+- **Script execution**: Always use `uv run` instead of `python3` to execute Python scripts
+  ```bash
+  # Good - uses inline dependencies from PEP 723 headers
+  uv run bin/some_script.py
+
+  # Avoid - doesn't guarantee dependencies
+  python3 bin/some_script.py
+  ```
+- **Portable scripts**: All scripts in `bin/` include PEP 723 inline dependencies, making them fully portable with uv
+- **Benefits**: This approach eliminates dependency hell in Python by ensuring consistent, reproducible environments
+
+### Testing Infrastructure
+- **Comprehensive test coverage**: Python scripts in `bin/` have extensive test coverage using pytest
+- **Test execution**: Tests can be run quickly with UV for PyPI-only environments
+  ```bash
+  # Run all tests
+  uv run pytest bin/test_*.py
+
+  # Run specific test
+  uv run pytest bin/test_specific_module.py
+  ```
+- **CI/CD**: The continuous integration pipeline uses UV instead of pip for improved speed and reliability
+- **Test organization**: Test files follow the pattern `test_*.py` and are colocated with the scripts they test
+
+## Development Notes
+
+1. **Testing**: Python scripts have comprehensive test coverage; Nextflow workflow tests are planned for future implementation
+2. **GPU Requirements**: Nanopore basecalling requires CUDA-capable GPUs
+3. **Memory Management**: Use `--low_memory` flag for resource-constrained environments
+4. **Slack Integration**: Optional alerts can be configured for pipeline completion
+5. **Dependency Management**: Always use `uv` for Python operations to ensure fast, reliable dependency resolution
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`# GitHub syntax highlighting`
`2`	`2`	`pixi.lock linguist-language=YAML`
	`3`	`+llms.txt linguist-language=markdown linguist-detectable=true`