Skip to content

Commit 52a4a22

Browse files
Merge pull request #174 from CCBR/iss-172
Fix sample discovery when `tumor-only` run without pairs file; minor issue with `pairs.tsv` in test dataset
2 parents 2b257f8 + 74c3536 commit 52a4a22

File tree

3 files changed

+54
-17
lines changed

3 files changed

+54
-17
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## XAVIER development version
22

3+
- Fixed a bug where tumor-only and unpaired runs could fail during Snakefile parsing (#174, @samarth8392)
4+
- Fixed the sample names in `pairs.tsv` in test/data folder which causes run with test data to fail (#174, @samarth8392)
35
## XAVIER 3.2.1
46

57
- Added separate variable for control-freec genome fasta file (#169, @samarth8392)

tests/data/pairs.tsv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
Normal Tumor
2-
WES_NC_N_1_0.25 WES_NC_T_1_0.25
2+
WES_NC_N_1_sub WES_NC_T_1_sub

workflow/Snakefile

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -270,31 +270,66 @@ VCF2MAF_WRAPPER=config['scripts']['vcf2maf_wrapper']
270270
SOBDetector_out=os.path.join(BASEDIR,"ffpe_filter","sobdetector")
271271
SOBDetector_JARFILE=os.path.join(SOBDetector_out, "jarfile","SOBDetector_v1.0.2.jar")
272272

273-
name_symlinks=[]
273+
name_symlinks = []
274+
275+
# FASTQ mode
274276
if fqs_found:
275-
name_suffix=".R[1,2].fastq.gz"
276-
if not os.path.exists(input_fqdir):
277-
# print("making"+output_fqdir)
278-
os.makedirs(input_fqdir)
279-
name_symlinks=_sym_safe_(fqs_found, input_fqdir)
280-
else:
281-
name_symlinks=glob.glob(os.path.join(input_fqdir,'*.fastq.gz'))
277+
name_suffix = r"\.R[12]\.fastq\.gz$"
278+
279+
# Ensure input_fqdir exists
280+
os.makedirs(input_fqdir, exist_ok=True)
281+
282+
# Prefer existing symlinks if present
283+
name_symlinks = glob.glob(os.path.join(input_fqdir, "*.fastq.gz"))
284+
285+
# Harden: if directory exists but is empty, (re)populate it
286+
if not name_symlinks:
287+
name_symlinks = _sym_safe_(fqs_found, input_fqdir)
288+
289+
# BAM mode
282290
elif bams_found:
283-
name_suffix=".input.bam"
284-
if not os.path.exists(input_bamdir):
285-
os.makedirs(input_bamdir)
286-
if (len(os.listdir(input_bamdir))==0):
287-
bam_symlinks=_sym_safe_(bams_found, input_bamdir)
288-
name_symlinks=glob.glob(os.path.join(input_bamdir,'*.input.bam'))
291+
name_suffix = r"\.input\.bam$"
292+
293+
os.makedirs(input_bamdir, exist_ok=True)
294+
name_symlinks = glob.glob(os.path.join(input_bamdir, "*.input.bam"))
295+
296+
if not name_symlinks:
297+
_ = _sym_safe_(bams_found, input_bamdir)
298+
name_symlinks = glob.glob(os.path.join(input_bamdir, "*.input.bam"))
299+
300+
# Nothing found
289301
else:
290-
raise NameError("""\n\tFatal: No relevant files found in the BAM or FASTQ directory!
302+
raise NameError(
303+
"""\n\tFatal: No relevant files found in the BAM or FASTQ directory!
291304
FASTQ source path provided: {}
292305
BAM source path provided: {}
293306
Folders should contain files ending with '.fastq.gz' or '.bam' respectively.
294307
""".format(fq_source, bam_source, sys.argv[0])
295308
)
296309

297-
samples = set([re.sub(name_suffix,"",os.path.basename(fname)) for fname in name_symlinks]) ## Only returns paired fqs
310+
# Derive sample names
311+
samples = set([
312+
re.sub(name_suffix, "", os.path.basename(fname))
313+
for fname in name_symlinks
314+
])
315+
316+
# Extra hardening: fail early with a more actionable message
317+
if not samples:
318+
raise NameError(
319+
"""\n\tFatal: No samples could be inferred from discovered inputs.
320+
FASTQ_SOURCE: {}
321+
BAM_SOURCE: {}
322+
input_fqdir: {}
323+
input_bamdir: {}
324+
Found FASTQs: {}
325+
Found BAMs: {}
326+
Note: if input_files/fastq or input_files/bam exists but is empty,
327+
populate it or delete it and rerun.
328+
""".format(
329+
fq_source, bam_source, input_fqdir, input_bamdir,
330+
len(fqs_found), len(bams_found), sys.argv[0]
331+
)
332+
)
298333

299334
pairs_file = config['input_params']['PAIRS_FILE']
300335

0 commit comments

Comments
 (0)