Skip to content

Commit 780e5da

Browse files
committed
Fix #900
1 parent 23c1b00 commit 780e5da

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

bin/fastq_dir_to_samplesheet.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ def parse_args(args=None):
6767
default=1,
6868
help="After splitting FastQ file name by --sanitise_name_delimiter all elements before this index (1-based) will be joined to create final sample name.",
6969
)
70+
parser.add_argument(
71+
"-re",
72+
"--recursive",
73+
dest="RECURSIVE",
74+
action="store_true",
75+
help="Whether or not to search for FastQ files recursively in <FASTQ_DIR>.",
76+
)
7077
return parser.parse_args(args)
7178

7279

@@ -80,6 +87,7 @@ def fastq_dir_to_samplesheet(
8087
sanitise_name=False,
8188
sanitise_name_delimiter="_",
8289
sanitise_name_index=1,
90+
recursive=False
8391
):
8492
def sanitize_sample(path, extension):
8593
"""Retrieve sample id from filename"""
@@ -90,27 +98,30 @@ def sanitize_sample(path, extension):
9098
)
9199
return sample
92100

93-
def get_fastqs(extension):
101+
def get_fastqs(extension, recursive=False):
94102
"""
95103
Needs to be sorted to ensure R1 and R2 are in the same order
96104
when merging technical replicates. Glob is not guaranteed to produce
97105
sorted results.
98106
See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered
99107
"""
100-
return sorted(glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False))
108+
search_path = f"*{extension}"
109+
if recursive:
110+
search_path = f"**/*{extension}"
111+
return sorted(glob.glob(os.path.join(fastq_dir, search_path), recursive=recursive))
101112

102113
read_dict = {}
103114

104115
## Get read 1 files
105-
for read1_file in get_fastqs(read1_extension):
116+
for read1_file in get_fastqs(read1_extension, recursive):
106117
sample = sanitize_sample(read1_file, read1_extension)
107118
if sample not in read_dict:
108119
read_dict[sample] = {"R1": [], "R2": []}
109120
read_dict[sample]["R1"].append(read1_file)
110121

111122
## Get read 2 files
112123
if not single_end:
113-
for read2_file in get_fastqs(read2_extension):
124+
for read2_file in get_fastqs(read2_extension, recursive):
114125
sample = sanitize_sample(read2_file, read2_extension)
115126
read_dict[sample]["R2"].append(read2_file)
116127

@@ -157,6 +168,7 @@ def main(args=None):
157168
sanitise_name=args.SANITISE_NAME,
158169
sanitise_name_delimiter=args.SANITISE_NAME_DELIMITER,
159170
sanitise_name_index=args.SANITISE_NAME_INDEX,
171+
recursive=args.RECURSIVE
160172
)
161173

162174

0 commit comments

Comments
 (0)