|
8 | 8 | from pathlib import Path |
9 | 9 | from typing import IO, Any, Dict, List, Optional, Sequence, Tuple, Union |
10 | 10 |
|
| 11 | +from smftools.constants import BAM_SUFFIX, MOD_LIST, MOD_MAP, SPLIT_DIR |
| 12 | + |
11 | 13 | from .discover_input_files import discover_input_files |
12 | 14 |
|
13 | 15 | # Optional dependency for YAML handling |
@@ -652,11 +654,11 @@ class ExperimentConfig: |
652 | 654 | input_data_path: Optional[str] = None |
653 | 655 | output_directory: Optional[str] = None |
654 | 656 | fasta: Optional[str] = None |
655 | | - bam_suffix: str = ".bam" |
| 657 | + bam_suffix: str = BAM_SUFFIX |
656 | 658 | recursive_input_search: bool = True |
657 | 659 | input_type: Optional[str] = None |
658 | 660 | input_files: Optional[List[Path]] = None |
659 | | - split_dir: str = "demultiplexed_BAMs" |
| 661 | + split_dir: str = SPLIT_DIR |
660 | 662 | split_path: Optional[str] = None |
661 | 663 | strands: List[str] = field(default_factory=lambda: ["bottom", "top"]) |
662 | 664 | conversions: List[str] = field(default_factory=lambda: ["unconverted"]) |
@@ -708,10 +710,10 @@ class ExperimentConfig: |
708 | 710 | hm5C_threshold: float = 0.7 |
709 | 711 | thresholds: List[float] = field(default_factory=list) |
710 | 712 | mod_list: List[str] = field( |
711 | | - default_factory=lambda: ["5mC_5hmC", "6mA"] |
| 713 | + default_factory=lambda: list(MOD_LIST) |
712 | 714 | ) # Dorado modified basecalling codes |
713 | 715 | mod_map: Dict[str, str] = field( |
714 | | - default_factory=lambda: {"6mA": "6mA", "5mC_5hmC": "5mC"} |
| 716 | + default_factory=lambda: dict(MOD_MAP) |
715 | 717 | ) # Map from dorado modified basecalling codes to codes used in modkit_extract_to_adata function |
716 | 718 |
|
717 | 719 | # Alignment params |
@@ -1058,7 +1060,7 @@ def from_var_dict( |
1058 | 1060 | elif input_data_path.is_dir(): |
1059 | 1061 | found = discover_input_files( |
1060 | 1062 | input_data_path, |
1061 | | - bam_suffix=merged["bam_suffix"], |
| 1063 | + bam_suffix=merged.get("bam_suffix", BAM_SUFFIX), |
1062 | 1064 | recursive=merged["recursive_input_search"], |
1063 | 1065 | ) |
1064 | 1066 |
|
@@ -1093,7 +1095,7 @@ def from_var_dict( |
1093 | 1095 | summary_file = output_dir / summary_file_basename |
1094 | 1096 |
|
1095 | 1097 | # Demultiplexing output path |
1096 | | - split_dir = merged.get("split_dir", "demultiplexed_BAMs") |
| 1098 | + split_dir = merged.get("split_dir", SPLIT_DIR) |
1097 | 1099 | split_path = output_dir / split_dir |
1098 | 1100 |
|
1099 | 1101 | # final normalization |
@@ -1228,7 +1230,7 @@ def from_var_dict( |
1228 | 1230 | barcode_kit=merged.get("barcode_kit"), |
1229 | 1231 | fastq_barcode_map=merged.get("fastq_barcode_map"), |
1230 | 1232 | fastq_auto_pairing=merged.get("fastq_auto_pairing"), |
1231 | | - bam_suffix=merged.get("bam_suffix", ".bam"), |
| 1233 | + bam_suffix=merged.get("bam_suffix", BAM_SUFFIX), |
1232 | 1234 | split_dir=split_dir, |
1233 | 1235 | split_path=split_path, |
1234 | 1236 | strands=merged.get("strands", ["bottom", "top"]), |
@@ -1261,7 +1263,8 @@ def from_var_dict( |
1261 | 1263 | m5C_threshold=merged.get("m5C_threshold", 0.7), |
1262 | 1264 | hm5C_threshold=merged.get("hm5C_threshold", 0.7), |
1263 | 1265 | thresholds=merged.get("thresholds", []), |
1264 | | - mod_list=merged.get("mod_list", ["5mC_5hmC", "6mA"]), |
| 1266 | + mod_list=merged.get("mod_list", list(MOD_LIST)), |
| 1267 | + mod_map=merged.get("mod_map", list(MOD_MAP)), |
1265 | 1268 | batch_size=merged.get("batch_size", 4), |
1266 | 1269 | skip_unclassified=merged.get("skip_unclassified", True), |
1267 | 1270 | delete_batch_hdfs=merged.get("delete_batch_hdfs", True), |
|
0 commit comments