Skip to content

Commit b6ad2b2

Browse files
committed
sra: Handle paired sequencing lacking statistics
1 parent 583449b commit b6ad2b2

File tree

4 files changed

+436
-1
lines changed

4 files changed

+436
-1
lines changed

rnaseq_pipeline/sources/sra.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,18 @@ def read_xml_metadata(path, include_invalid_runs=False) -> List[SraRunMetadata]:
455455
issues |= SraRunIssue.AMBIGUOUS_READ_SIZES
456456

457457
elif len(sra_fastq_files) == 1:
458-
logger.info('%s: Single FASTQ file found, using it as a single-end dataset.', srr)
458+
logger.info('%s: Single FASTQ file found, assuming it as a single-end dataset.', srr)
459+
fastq_filenames = [sf.attrib['filename'] for sf in sra_fastq_files]
460+
fastq_file_sizes = [int(sf.attrib['size']) for sf in sra_fastq_files]
461+
use_bamtofastq = False
462+
bam_filenames = [bf.attrib['filename'] for bf in sra_10x_bam_files]
463+
bam_file_urls = [bf.attrib['url'] for bf in sra_10x_bam_files]
464+
bam_fastq_filenames = None
465+
read_types = fastq_load_read_types
466+
467+
elif len(sra_fastq_files) == 2:
468+
logger.warning('%s: Two FASTQ file found, assuming it as a paired-end dataset, but order might be arbitrary.', srr)
469+
# this is not ordered, but aligner can usually deal with that pretty well
459470
fastq_filenames = [sf.attrib['filename'] for sf in sra_fastq_files]
460471
fastq_file_sizes = [int(sf.attrib['size']) for sf in sra_fastq_files]
461472
use_bamtofastq = False

tests/data/SRX21207402.xml

Lines changed: 211 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)