Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# TACA Version Log

## 20240311.1

Have AVITI demux manifest accommodate samples with recipes of read length 0 by treating them as full length.

## 20250310.1

Fix command in ONT delivery
Expand Down
46 changes: 33 additions & 13 deletions taca/element/Element_Runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,34 @@

def get_mask(
seq: str,
keep_Ns: bool,
keep: str,
prefix: str,
cycles_used: int,
) -> str:
"""
Inputs:
seq Sequence string to make mask from
keep_Ns Whether Ns should be "Y" or "N" in the mask, vice versa for ACGT
keep "bases" or "Ns", mutually exclusive options
prefix Prefix to add to the mask
cycles_used Number of cycles used in the sequencing run
Example usage:
get_mask( "ACGTNNN", True, "I1:", 7 ) -> 'I1:N4Y3'
get_mask( "ACGTNNN", False, "I2:", 10 ) -> 'I2:Y4N6'
get_mask(
seq = "ACGTNNN",
keep = "Ns",
prefix = "I1:",
cycles_used = 7
)
-> 'I1:N4Y3'
get_mask(
seq = "ACGTNNN",
keep = "bases",
prefix = "I2:",
cycles_used = 10
)
-> 'I2:Y4N6'
"""

# Input assertions
Expand All @@ -46,6 +60,7 @@ def get_mask(
"I1:",
"I2:",
], f"Mask prefix {prefix} not recognized"
assert keep in ["bases", "Ns"], f"Keep option {keep} not recognized"

# Handle no-input cases
if seq == "":
Expand All @@ -61,7 +76,7 @@ def get_mask(
"G": "Y",
"T": "Y",
}
if keep_Ns is False
if keep == "bases"
else {
"N": "Y",
"A": "N",
Expand Down Expand Up @@ -511,47 +526,52 @@ def make_demux_manifests(
df_samples["I1Mask"] = df_samples["Index1"].apply(
lambda seq: get_mask(
seq=seq,
keep_Ns=False,
keep="bases",
prefix="I1:",
cycles_used=self.cycles["I1"],
)
)
df_samples["I2Mask"] = df_samples["Index2"].apply(
lambda seq: get_mask(
seq=seq,
keep_Ns=False,
keep="bases",
prefix="I2:",
cycles_used=self.cycles["I2"],
)
)
df_samples["I1UmiMask"] = df_samples["Index1"].apply(
lambda seq: get_mask(
seq=seq,
keep_Ns=True,
keep="Ns",
prefix="I1:",
cycles_used=self.cycles["I1"],
)
)
df_samples["I2UmiMask"] = df_samples["Index2"].apply(
lambda seq: get_mask(
seq=seq,
keep_Ns=True,
keep="Ns",
prefix="I2:",
cycles_used=self.cycles["I2"],
)
)
# For the read masks, interpret a recipe of 0-0 to simply use all cycles
df_samples["R1Mask"] = df_samples["Recipe"].apply(
lambda recipe: get_mask(
seq="N" * int(recipe.split("-")[0]),
keep_Ns=True,
seq="N" * int(recipe.split("-")[0])
if int(recipe.split("-")[0]) > 0
else "N" * self.cycles["R1"],
keep="Ns",
prefix="R1:",
cycles_used=self.cycles["R1"],
)
)
df_samples["R2Mask"] = df_samples["Recipe"].apply(
lambda recipe: get_mask(
seq="N" * int(recipe.split("-")[3]),
keep_Ns=True,
seq="N" * int(recipe.split("-")[-1])
if int(recipe.split("-")[-1]) > 0
else "N" * self.cycles["R2"],
keep="Ns",
prefix="R2:",
cycles_used=self.cycles["R2"],
)
Expand Down