Skip to content

Commit 6f383b1

Browse files
Mike LeeMike Lee
authored andcommitted
preventing duplicate read ids from bit-gen-reads
1 parent c4787ac commit 6f383b1

File tree

4 files changed

+20
-9
lines changed

4 files changed

+20
-9
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
1414
-->
1515

16+
## v1.13.14 (13-Mar-2026)
17+
18+
### Fixed
19+
- `bit-gen-reads` previously may have by chance created reads with identical headers (since only coordinates were being added), now there is also a counter to prevent this
20+
21+
---
22+
1623
## v1.13.13 (12-Mar-2026)
1724

1825
### Added

bit/cli/gen_reads.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,4 @@ def main():
115115
elif not args.read_length:
116116
args.read_length = 150
117117

118-
print(args)
119-
120118
generate_reads(args)

bit/modules/gen_reads.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def gen_paired_reads(args, proportions):
106106
num_fragments = args.num_reads // 2
107107
reads_remaining = num_fragments
108108
remainder = 0.0
109+
read_count = 0
109110

110111
for fasta_file in tqdm(args.input_fastas, desc = " Generating reads from each input FASTA file"):
111112

@@ -131,12 +132,13 @@ def gen_paired_reads(args, proportions):
131132

132133
quality_scores = "I" * args.read_length
133134

134-
fw.write(f"@{seq_id}_{start}/1\n")
135+
read_count += 1
136+
fw.write(f"@{seq_id}_{read_count}_{start}/1\n")
135137
fw.write(f"{forward_read}\n")
136138
fw.write(f"+\n")
137139
fw.write(f"{quality_scores}\n")
138140

139-
rw.write(f"@{seq_id}_{start}/2\n")
141+
rw.write(f"@{seq_id}_{read_count}_{start}/2\n")
140142
rw.write(f"{reverse_read}\n")
141143
rw.write(f"+\n")
142144
rw.write(f"{quality_scores}\n")
@@ -159,12 +161,13 @@ def gen_paired_reads(args, proportions):
159161
reverse_read = fragment[-args.read_length:][::-1].translate(str.maketrans("ACGT", "TGCA"))
160162
quality_scores = "I" * args.read_length
161163

162-
fw.write(f"@{seq_id}_{start}/1\n")
164+
read_count += 1
165+
fw.write(f"@{seq_id}_{read_count}_{start}/1\n")
163166
fw.write(f"{forward_read}\n")
164167
fw.write(f"+\n")
165168
fw.write(f"{quality_scores}\n")
166169

167-
rw.write(f"@{seq_id}_{start}/2\n")
170+
rw.write(f"@{seq_id}_{read_count}_{start}/2\n")
168171
rw.write(f"{reverse_read}\n")
169172
rw.write(f"+\n")
170173
rw.write(f"{quality_scores}\n")
@@ -185,6 +188,7 @@ def gen_single_reads(args, proportions):
185188

186189
reads_remaining = args.num_reads
187190
remainder = 0.0
191+
read_count = 0
188192

189193
for fasta_file in tqdm(args.input_fastas, desc = " Generating reads from each input FASTA file"):
190194

@@ -210,7 +214,8 @@ def gen_single_reads(args, proportions):
210214

211215
quality_scores = "I" * len(read)
212216

213-
fw.write(f"@{seq_id}_{start}\n")
217+
read_count += 1
218+
fw.write(f"@{seq_id}_{read_count}_{start}\n")
214219
fw.write(f"{read}\n")
215220
fw.write(f"+\n")
216221
fw.write(f"{quality_scores}\n")
@@ -234,7 +239,8 @@ def gen_single_reads(args, proportions):
234239
read, start = extract_subsequence(sequence, seq_length, read_len, args.circularize)
235240

236241
quality_scores = "I" * len(read)
237-
fw.write(f"@{seq_id}_{start}\n")
242+
read_count += 1
243+
fw.write(f"@{seq_id}_{read_count}_{start}\n")
238244
fw.write(f"{read}\n")
239245
fw.write(f"+\n")
240246
fw.write(f"{quality_scores}\n")

bit/tests/test_gen_reads.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_gen_reads(tmp_path):
3737
expected = f.read().splitlines()
3838

3939
observed = [
40-
"@partial-NC_003131.1_550/1",
40+
"@partial-NC_003131.1_1_550/1",
4141
"GTGGACGACT",
4242
"+",
4343
"IIIIIIIIII"

0 commit comments

Comments
 (0)