Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions .github/workflows/Barcode.Mouse.10x_concat.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: Barcode Mouse 10x concatenated reads

on:
workflow_dispatch:
schedule:
- cron: '15 3 * * 2'

env:
LAUNCHER: ${{github.workspace}}/isoquant_tests/github/run_barcode_test.py
CFG_DIR: /abga/work/andreyp/ci_isoquant/data/barcodes
BIN_PATH: /abga/work/andreyp/ci_isoquant/bin/
OUTPUT_BASE: /abga/work/andreyp/ci_isoquant/output/${{github.ref_name}}/barcodes/

concurrency:
group: ${{github.workflow}}
cancel-in-progress: false

jobs:
check-changes:
runs-on:
labels: [isoquant]
name: 'Check for recent changes'
outputs:
has_changes: ${{steps.check.outputs.has_changes}}
steps:
- name: 'Checkout'
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: 'Check for commits in last 7 days'
id: check
run: |
# Always run on manual trigger
if [ "${{github.event_name}}" = "workflow_dispatch" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check for commits in last 7 days
COMMITS=$(git log --oneline --since="7 days ago" | wc -l)
if [ "$COMMITS" -gt 0 ]; then
echo "Found $COMMITS commits in last 7 days"
echo "has_changes=true" >> $GITHUB_OUTPUT
else
echo "No commits in last 7 days, skipping"
echo "has_changes=false" >> $GITHUB_OUTPUT
fi

launch-runner:
needs: check-changes
if: needs.check-changes.outputs.has_changes == 'true'
runs-on:
labels: [isoquant]
name: 'Running barcode detection QC for 10x concatenated reads'

steps:
- name: 'Cleanup'
run: >
set -e &&
shopt -s dotglob &&
rm -rf *

- name: 'Checkout'
uses: actions/checkout@v3
with:
fetch-depth: 1

- name: 'Mouse 10x concatenated reads (3M whitelist)'
if: always()
shell: bash
env:
STEP_NAME: Mouse.10x_concat.3M.800K
run: |
export PATH=$PATH:${{env.BIN_PATH}}
python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.yaml -o ${{env.OUTPUT_BASE}}

- name: 'Mouse 10x concatenated reads (realistic 9K whitelist)'
if: always()
shell: bash
env:
STEP_NAME: Mouse.10x_concat.realistic.800K
run: |
export PATH=$PATH:${{env.BIN_PATH}}
python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.yaml -o ${{env.OUTPUT_BASE}}
6 changes: 6 additions & 0 deletions isoquant_lib/barcode_calling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
LinkerBarcodeDetectionResult,
TSOBarcodeDetectionResult,
TenXBarcodeDetectionResult,
TenXSplitBarcodeDetectionResult,
SplittingBarcodeDetectionResult,
ReadStats,
increase_if_valid,
Expand All @@ -49,6 +50,8 @@
SharedMemoryWrapper,
TenXBarcodeDetector,
TenXv2BarcodeDetector,
TenXSplittingBarcodeDetector,
TenXv2SplittingBarcodeDetector,
VisiumHDBarcodeDetector,
UniversalSingleMoleculeExtractor,
MoleculeStructure
Expand Down Expand Up @@ -80,6 +83,7 @@
'LinkerBarcodeDetectionResult',
'TSOBarcodeDetectionResult',
'TenXBarcodeDetectionResult',
'TenXSplitBarcodeDetectionResult',
'SplittingBarcodeDetectionResult',
'ReadStats',
'increase_if_valid',
Expand All @@ -95,6 +99,8 @@
# 10x detectors
'TenXBarcodeDetector',
'TenXv2BarcodeDetector',
'TenXSplittingBarcodeDetector',
'TenXv2SplittingBarcodeDetector',
'VisiumHDBarcodeDetector',
# universal calling
'UniversalSingleMoleculeExtractor',
Expand Down
9 changes: 9 additions & 0 deletions isoquant_lib/barcode_calling/callers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
LinkerBarcodeDetectionResult: Result for platforms with linker sequences
TSOBarcodeDetectionResult: Result for Stereo-seq with TSO detection
TenXBarcodeDetectionResult: Result for 10x Genomics platform
TenXSplitBarcodeDetectionResult: Result for 10x split mode (includes TSO position)
SplittingBarcodeDetectionResult: Result for read splitting modes
ExtractionResult: Dict-based result for universal molecule extraction
ReadStats: Statistics tracker for barcode detection
Expand All @@ -29,6 +30,8 @@
SharedMemoryWrapper: Generic shared memory wrapper
TenXBarcodeDetector: 10x Genomics v3 detector
TenXv2BarcodeDetector: 10x Genomics v2 detector
TenXSplittingBarcodeDetector: 10x v3 splitting detector (concatenated reads)
TenXv2SplittingBarcodeDetector: 10x v2 splitting detector (concatenated reads)
VisiumHDBarcodeDetector: Visium HD detector
UniversalSingleMoleculeExtractor: Universal barcode detector for custom molecules
"""
Expand All @@ -40,6 +43,7 @@
LinkerBarcodeDetectionResult,
TSOBarcodeDetectionResult,
TenXBarcodeDetectionResult,
TenXSplitBarcodeDetectionResult,
SplittingBarcodeDetectionResult,
ReadStats,
increase_if_valid,
Expand All @@ -66,6 +70,8 @@
from .tenx import (
TenXBarcodeDetector,
TenXv2BarcodeDetector,
TenXSplittingBarcodeDetector,
TenXv2SplittingBarcodeDetector,
VisiumHDBarcodeDetector,
)

Expand All @@ -81,6 +87,7 @@
'LinkerBarcodeDetectionResult',
'TSOBarcodeDetectionResult',
'TenXBarcodeDetectionResult',
'TenXSplitBarcodeDetectionResult',
'SplittingBarcodeDetectionResult',
'ExtractionResult',
'DetectedElement',
Expand All @@ -98,6 +105,8 @@
# 10x detectors
'TenXBarcodeDetector',
'TenXv2BarcodeDetector',
'TenXSplittingBarcodeDetector',
'TenXv2SplittingBarcodeDetector',
'VisiumHDBarcodeDetector',
# Universal extraction
'MoleculeStructure',
Expand Down
57 changes: 54 additions & 3 deletions isoquant_lib/barcode_calling/callers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"""

from collections import defaultdict
from typing import List, Optional, Dict, Iterable
from typing import List, Optional, Dict, Iterable, Union


def increase_if_valid(val: Optional[int], delta: int) -> Optional[int]:
Expand Down Expand Up @@ -253,6 +253,20 @@ def get_additional_attributes(self) -> List[str]:
attr.append("TSO detected")
return attr

def get_fasta_segment_start(self) -> int:
"""Start position of the FASTA segment to extract for this molecule."""
return max(0, self.primer - 25, self.polyT - 75)

def get_fasta_segment_end(self, seq_len: int) -> int:
"""End position of the FASTA segment to extract for this molecule."""
if self.tso5 == -1:
return seq_len
return min(seq_len, self.tso5 + 25)

def get_tso_position(self) -> int:
"""Return TSO position for use in require_tso checks."""
return self.tso5

@staticmethod
def header() -> str:
"""Static header for class-level access."""
Expand Down Expand Up @@ -299,6 +313,43 @@ def header() -> str:
return BarcodeDetectionResult.header() + "\tpolyT_start\tR1_end"


class TenXSplitBarcodeDetectionResult(TenXBarcodeDetectionResult):
"""Detection result for 10x split mode — includes TSO position for molecule boundary."""

def __init__(self, read_id: str, barcode: str = BarcodeDetectionResult.NOSEQ,
UMI: str = BarcodeDetectionResult.NOSEQ,
BC_score: int = -1, UMI_good: bool = False, strand: str = ".",
polyT: int = -1, r1: int = -1, tso: int = -1):
TenXBarcodeDetectionResult.__init__(self, read_id, barcode, UMI, BC_score, UMI_good, strand, polyT, r1)
self.tso: int = tso

def update_coordinates(self, delta: int) -> None:
TenXBarcodeDetectionResult.update_coordinates(self, delta)
if self.tso != -1:
self.tso += delta

def get_fasta_segment_start(self) -> int:
"""Start position of FASTA segment: just before R1 linker."""
return max(0, self.r1 - 10) if self.r1 != -1 else 0

def get_fasta_segment_end(self, seq_len: int) -> int:
"""End position of FASTA segment: just past the TSO, or end of read."""
if self.tso == -1:
return seq_len
return min(seq_len, self.tso + 35)

def get_tso_position(self) -> int:
"""Return TSO position for use in require_tso checks."""
return self.tso

def __str__(self) -> str:
return TenXBarcodeDetectionResult.__str__(self) + "\t%d" % self.tso

@staticmethod
def header() -> str:
return TenXBarcodeDetectionResult.header() + "\ttso_start"


class SplittingBarcodeDetectionResult:
"""Result container for read splitting modes (multiple barcodes per read)."""

Expand All @@ -307,9 +358,9 @@ class SplittingBarcodeDetectionResult:
def __init__(self, read_id: str):
self.read_id: str = read_id
self.strand: str = "." # For protocol compatibility
self.detected_patterns: List[TSOBarcodeDetectionResult] = []
self.detected_patterns: List[Union[TSOBarcodeDetectionResult, TenXSplitBarcodeDetectionResult]] = []

def append(self, barcode_detection_result: TSOBarcodeDetectionResult) -> None:
def append(self, barcode_detection_result: Union[TSOBarcodeDetectionResult, TenXSplitBarcodeDetectionResult]) -> None:
self.detected_patterns.append(barcode_detection_result)

def empty(self) -> bool:
Expand Down
Loading
Loading