|
27 | 27 |
|
28 | 28 |
|
29 | 29 | def converted_BAM_to_adata( |
30 | | - converted_FASTA, |
31 | | - split_dir, |
32 | | - output_dir, |
33 | | - input_already_demuxed, |
34 | | - mapping_threshold, |
35 | | - experiment_name, |
36 | | - conversions, |
37 | | - bam_suffix, |
38 | | - device="cpu", |
39 | | - num_threads=8, |
40 | | - deaminase_footprinting=False, |
41 | | - delete_intermediates=True, |
42 | | - double_barcoded_path=None, |
43 | | -): |
44 | | - """ |
45 | | - Converts BAM files into an AnnData object by binarizing modified base identities. |
46 | | -
|
47 | | - Parameters: |
48 | | - converted_FASTA (Path): Path to the converted FASTA reference. |
49 | | - split_dir (Path): Directory containing converted BAM files. |
50 | | - output_dir (Path): Directory of the output dir |
51 | | - input_already_demuxed (bool): Whether input reads were originally demuxed |
52 | | - mapping_threshold (float): Minimum fraction of aligned reads required for inclusion. |
53 | | - experiment_name (str): Name for the output AnnData object. |
54 | | - conversions (list): List of modification types (e.g., ['unconverted', '5mC', '6mA']). |
55 | | - bam_suffix (str): File suffix for BAM files. |
56 | | - num_threads (int): Number of parallel processing threads. |
57 | | - deaminase_footprinting (bool): Whether the footprinting was done with a direct deamination chemistry. |
58 | | - double_barcoded_path (Path): Path to dorado demux summary file of double ended barcodes |
| 30 | + converted_FASTA: str | Path, |
| 31 | + split_dir: Path, |
| 32 | + output_dir: Path, |
| 33 | + input_already_demuxed: bool, |
| 34 | + mapping_threshold: float, |
| 35 | + experiment_name: str, |
| 36 | + conversions: list[str], |
| 37 | + bam_suffix: str, |
| 38 | + device: str | torch.device = "cpu", |
| 39 | + num_threads: int = 8, |
| 40 | + deaminase_footprinting: bool = False, |
| 41 | + delete_intermediates: bool = True, |
| 42 | + double_barcoded_path: Path | None = None, |
| 43 | +) -> tuple[ad.AnnData | None, Path]: |
| 44 | + """Convert BAM files into an AnnData object by binarizing modified base identities. |
| 45 | +
|
| 46 | + Args: |
| 47 | + converted_FASTA: Path to the converted FASTA reference. |
| 48 | + split_dir: Directory containing converted BAM files. |
| 49 | + output_dir: Output directory for intermediate and final files. |
| 50 | + input_already_demuxed: Whether input reads were originally demultiplexed. |
| 51 | + mapping_threshold: Minimum fraction of aligned reads required for inclusion. |
| 52 | + experiment_name: Name for the output AnnData object. |
| 53 | + conversions: List of modification types (e.g., ``["unconverted", "5mC", "6mA"]``). |
| 54 | + bam_suffix: File suffix for BAM files. |
| 55 | + device: Torch device or device string. |
| 56 | + num_threads: Number of parallel processing threads. |
| 57 | + deaminase_footprinting: Whether the footprinting used direct deamination chemistry. |
| 58 | + delete_intermediates: Whether to remove intermediate files after processing. |
| 59 | + double_barcoded_path: Path to dorado demux summary file of double-ended barcodes. |
59 | 60 |
|
60 | 61 | Returns: |
61 | | - str: Path to the final AnnData object. |
| 62 | + tuple[anndata.AnnData | None, Path]: The AnnData object (if generated) and its path. |
62 | 63 | """ |
63 | 64 | if torch.cuda.is_available(): |
64 | 65 | device = torch.device("cuda") |
|
0 commit comments