Skip to content

Commit 6000a2d

Browse files
committed
Fix dedup output order for unmapped pairs to the same file
1 parent 0c1a0c7 commit 6000a2d

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

pairtools/lib/dedup.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,19 @@ def streaming_dedup(
8989
# Clean up dataframe:
9090
df_chunk = df_chunk.drop(columns=["duplicate"])
9191

92-
# Stream the pairs:
93-
# If outstream_dups is the same as outstream, we save all mapped pairs to the same file
92+
# Save the pairs:
9493

94+
# Stream unmapped:
95+
if outstream_unmapped:
96+
df_chunk.loc[~mask_mapped, :].to_csv(
97+
outstream_unmapped,
98+
index=False,
99+
header=False,
100+
sep="\t",
101+
quoting=QUOTE_NONE,
102+
)
103+
104+
# If outstream_dups is the same as outstream, we save the mapped pairs to the same file
95105
if outstream_dups == outstream:
96106
df_chunk.loc[mask_mapped, :].to_csv(
97107
outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE
@@ -116,16 +126,6 @@ def streaming_dedup(
116126
outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE
117127
)
118128

119-
# Stream unmapped:
120-
if outstream_unmapped:
121-
df_chunk.loc[~mask_mapped, :].to_csv(
122-
outstream_unmapped,
123-
index=False,
124-
header=False,
125-
sep="\t",
126-
quoting=QUOTE_NONE,
127-
)
128-
129129
t1 = time.time()
130130
t = t1 - t0
131131
logger.debug(f"total time: {t}")

0 commit comments

Comments
 (0)