diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 898dcead..2246716b 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # TACA Version Log +## 20240311.1 + +Have AVITI demux manifest accommodate samples with recipes of read length 0 by treating them as full length. + ## 20250310.1 Fix command in ONT delivery diff --git a/taca/element/Element_Runs.py b/taca/element/Element_Runs.py index e686a93a..3624c5c5 100644 --- a/taca/element/Element_Runs.py +++ b/taca/element/Element_Runs.py @@ -21,20 +21,34 @@ def get_mask( seq: str, - keep_Ns: bool, + keep: str, prefix: str, cycles_used: int, ) -> str: """ Inputs: seq Sequence string to make mask from - keep_Ns Whether Ns should be "Y" or "N" in the mask, vice versa for ACGT + keep "bases" or "Ns", mutually exclusive options prefix Prefix to add to the mask cycles_used Number of cycles used in the sequencing run Example usage: - get_mask( "ACGTNNN", True, "I1:", 7 ) -> 'I1:N4Y3' - get_mask( "ACGTNNN", False, "I2:", 10 ) -> 'I2:Y4N6' + + get_mask( + seq = "ACGTNNN", + keep = "Ns", + prefix = "I1:", + cycles_used = 7 + ) + -> 'I1:N4Y3' + + get_mask( + seq = "ACGTNNN", + keep = "bases", + prefix = "I2:", + cycles_used = 10 + ) + -> 'I2:Y4N6' """ # Input assertions @@ -46,6 +60,7 @@ def get_mask( "I1:", "I2:", ], f"Mask prefix {prefix} not recognized" + assert keep in ["bases", "Ns"], f"Keep option {keep} not recognized" # Handle no-input cases if seq == "": @@ -61,7 +76,7 @@ def get_mask( "G": "Y", "T": "Y", } - if keep_Ns is False + if keep == "bases" else { "N": "Y", "A": "N", @@ -511,7 +526,7 @@ def make_demux_manifests( df_samples["I1Mask"] = df_samples["Index1"].apply( lambda seq: get_mask( seq=seq, - keep_Ns=False, + keep="bases", prefix="I1:", cycles_used=self.cycles["I1"], ) @@ -519,7 +534,7 @@ def make_demux_manifests( df_samples["I2Mask"] = df_samples["Index2"].apply( lambda seq: get_mask( seq=seq, - keep_Ns=False, + keep="bases", prefix="I2:", cycles_used=self.cycles["I2"], ) @@ -527,7 +542,7 @@ def make_demux_manifests( df_samples["I1UmiMask"] = df_samples["Index1"].apply( lambda seq: get_mask( seq=seq, - keep_Ns=True, + keep="Ns", prefix="I1:", cycles_used=self.cycles["I1"], ) @@ -535,23 +550,28 @@ def make_demux_manifests( df_samples["I2UmiMask"] = df_samples["Index2"].apply( lambda seq: get_mask( seq=seq, - keep_Ns=True, + keep="Ns", prefix="I2:", cycles_used=self.cycles["I2"], ) ) + # For the read masks, interpret a recipe of 0-0 to simply use all cycles df_samples["R1Mask"] = df_samples["Recipe"].apply( lambda recipe: get_mask( - seq="N" * int(recipe.split("-")[0]), - keep_Ns=True, + seq="N" * int(recipe.split("-")[0]) + if int(recipe.split("-")[0]) > 0 + else "N" * self.cycles["R1"], + keep="Ns", prefix="R1:", cycles_used=self.cycles["R1"], ) ) df_samples["R2Mask"] = df_samples["Recipe"].apply( lambda recipe: get_mask( - seq="N" * int(recipe.split("-")[3]), - keep_Ns=True, + seq="N" * int(recipe.split("-")[-1]) + if int(recipe.split("-")[-1]) > 0 + else "N" * self.cycles["R2"], + keep="Ns", prefix="R2:", cycles_used=self.cycles["R2"], )