Skip to content

Commit 61f7541

Browse files
authored
Merge pull request #468 from kedhammar/aviti-readmasks
Fix AVITI readmasks
2 parents 2047818 + 2fc250f commit 61f7541

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

VERSIONLOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# TACA Version Log
22

3+
## 20240311.1
4+
5+
Have AVITI demux manifest accommodate samples with recipes of read length 0 by treating them as full length.
6+
37
## 20250310.1
48

59
Fix command in ONT delivery

taca/element/Element_Runs.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,34 @@
2121

2222
def get_mask(
2323
seq: str,
24-
keep_Ns: bool,
24+
keep: str,
2525
prefix: str,
2626
cycles_used: int,
2727
) -> str:
2828
"""
2929
Inputs:
3030
seq Sequence string to make mask from
31-
keep_Ns Whether Ns should be "Y" or "N" in the mask, vice versa for ACGT
31+
keep "bases" or "Ns", mutually exclusive options
3232
prefix Prefix to add to the mask
3333
cycles_used Number of cycles used in the sequencing run
3434
3535
Example usage:
36-
get_mask( "ACGTNNN", True, "I1:", 7 ) -> 'I1:N4Y3'
37-
get_mask( "ACGTNNN", False, "I2:", 10 ) -> 'I2:Y4N6'
36+
37+
get_mask(
38+
seq = "ACGTNNN",
39+
keep = "Ns",
40+
prefix = "I1:",
41+
cycles_used = 7
42+
)
43+
-> 'I1:N4Y3'
44+
45+
get_mask(
46+
seq = "ACGTNNN",
47+
keep = "bases",
48+
prefix = "I2:",
49+
cycles_used = 10
50+
)
51+
-> 'I2:Y4N6'
3852
"""
3953

4054
# Input assertions
@@ -46,6 +60,7 @@ def get_mask(
4660
"I1:",
4761
"I2:",
4862
], f"Mask prefix {prefix} not recognized"
63+
assert keep in ["bases", "Ns"], f"Keep option {keep} not recognized"
4964

5065
# Handle no-input cases
5166
if seq == "":
@@ -61,7 +76,7 @@ def get_mask(
6176
"G": "Y",
6277
"T": "Y",
6378
}
64-
if keep_Ns is False
79+
if keep == "bases"
6580
else {
6681
"N": "Y",
6782
"A": "N",
@@ -511,47 +526,52 @@ def make_demux_manifests(
511526
df_samples["I1Mask"] = df_samples["Index1"].apply(
512527
lambda seq: get_mask(
513528
seq=seq,
514-
keep_Ns=False,
529+
keep="bases",
515530
prefix="I1:",
516531
cycles_used=self.cycles["I1"],
517532
)
518533
)
519534
df_samples["I2Mask"] = df_samples["Index2"].apply(
520535
lambda seq: get_mask(
521536
seq=seq,
522-
keep_Ns=False,
537+
keep="bases",
523538
prefix="I2:",
524539
cycles_used=self.cycles["I2"],
525540
)
526541
)
527542
df_samples["I1UmiMask"] = df_samples["Index1"].apply(
528543
lambda seq: get_mask(
529544
seq=seq,
530-
keep_Ns=True,
545+
keep="Ns",
531546
prefix="I1:",
532547
cycles_used=self.cycles["I1"],
533548
)
534549
)
535550
df_samples["I2UmiMask"] = df_samples["Index2"].apply(
536551
lambda seq: get_mask(
537552
seq=seq,
538-
keep_Ns=True,
553+
keep="Ns",
539554
prefix="I2:",
540555
cycles_used=self.cycles["I2"],
541556
)
542557
)
558+
# For the read masks, interpret a recipe of 0-0 to simply use all cycles
543559
df_samples["R1Mask"] = df_samples["Recipe"].apply(
544560
lambda recipe: get_mask(
545-
seq="N" * int(recipe.split("-")[0]),
546-
keep_Ns=True,
561+
seq="N" * int(recipe.split("-")[0])
562+
if int(recipe.split("-")[0]) > 0
563+
else "N" * self.cycles["R1"],
564+
keep="Ns",
547565
prefix="R1:",
548566
cycles_used=self.cycles["R1"],
549567
)
550568
)
551569
df_samples["R2Mask"] = df_samples["Recipe"].apply(
552570
lambda recipe: get_mask(
553-
seq="N" * int(recipe.split("-")[3]),
554-
keep_Ns=True,
571+
seq="N" * int(recipe.split("-")[-1])
572+
if int(recipe.split("-")[-1]) > 0
573+
else "N" * self.cycles["R2"],
574+
keep="Ns",
555575
prefix="R2:",
556576
cycles_used=self.cycles["R2"],
557577
)

0 commit comments

Comments
 (0)