2121
2222def get_mask (
2323 seq : str ,
24- keep_Ns : bool ,
24+ keep : str ,
2525 prefix : str ,
2626 cycles_used : int ,
2727) -> str :
2828 """
2929 Inputs:
3030 seq Sequence string to make mask from
31- keep_Ns Whether Ns should be "Y " or "N" in the mask, vice versa for ACGT
31+ keep "bases " or "Ns", mutually exclusive options
3232 prefix Prefix to add to the mask
3333 cycles_used Number of cycles used in the sequencing run
3434
3535 Example usage:
36- get_mask( "ACGTNNN", True, "I1:", 7 ) -> 'I1:N4Y3'
37- get_mask( "ACGTNNN", False, "I2:", 10 ) -> 'I2:Y4N6'
36+
37+ get_mask(
38+ seq = "ACGTNNN",
39+ keep = "Ns",
40+ prefix = "I1:",
41+ cycles_used = 7
42+ )
43+ -> 'I1:N4Y3'
44+
45+ get_mask(
46+ seq = "ACGTNNN",
47+ keep = "bases",
48+ prefix = "I2:",
49+ cycles_used = 10
50+ )
51+ -> 'I2:Y4N6'
3852 """
3953
4054 # Input assertions
@@ -46,6 +60,7 @@ def get_mask(
4660 "I1:" ,
4761 "I2:" ,
4862 ], f"Mask prefix { prefix } not recognized"
63+ assert keep in ["bases" , "Ns" ], f"Keep option { keep } not recognized"
4964
5065 # Handle no-input cases
5166 if seq == "" :
@@ -61,7 +76,7 @@ def get_mask(
6176 "G" : "Y" ,
6277 "T" : "Y" ,
6378 }
64- if keep_Ns is False
79+ if keep == "bases"
6580 else {
6681 "N" : "Y" ,
6782 "A" : "N" ,
@@ -511,47 +526,52 @@ def make_demux_manifests(
511526 df_samples ["I1Mask" ] = df_samples ["Index1" ].apply (
512527 lambda seq : get_mask (
513528 seq = seq ,
514- keep_Ns = False ,
529+ keep = "bases" ,
515530 prefix = "I1:" ,
516531 cycles_used = self .cycles ["I1" ],
517532 )
518533 )
519534 df_samples ["I2Mask" ] = df_samples ["Index2" ].apply (
520535 lambda seq : get_mask (
521536 seq = seq ,
522- keep_Ns = False ,
537+ keep = "bases" ,
523538 prefix = "I2:" ,
524539 cycles_used = self .cycles ["I2" ],
525540 )
526541 )
527542 df_samples ["I1UmiMask" ] = df_samples ["Index1" ].apply (
528543 lambda seq : get_mask (
529544 seq = seq ,
530- keep_Ns = True ,
545+ keep = "Ns" ,
531546 prefix = "I1:" ,
532547 cycles_used = self .cycles ["I1" ],
533548 )
534549 )
535550 df_samples ["I2UmiMask" ] = df_samples ["Index2" ].apply (
536551 lambda seq : get_mask (
537552 seq = seq ,
538- keep_Ns = True ,
553+ keep = "Ns" ,
539554 prefix = "I2:" ,
540555 cycles_used = self .cycles ["I2" ],
541556 )
542557 )
558+ # For the read masks, interpret a recipe of 0-0 to simply use all cycles
543559 df_samples ["R1Mask" ] = df_samples ["Recipe" ].apply (
544560 lambda recipe : get_mask (
545- seq = "N" * int (recipe .split ("-" )[0 ]),
546- keep_Ns = True ,
561+ seq = "N" * int (recipe .split ("-" )[0 ])
562+ if int (recipe .split ("-" )[0 ]) > 0
563+ else "N" * self .cycles ["R1" ],
564+ keep = "Ns" ,
547565 prefix = "R1:" ,
548566 cycles_used = self .cycles ["R1" ],
549567 )
550568 )
551569 df_samples ["R2Mask" ] = df_samples ["Recipe" ].apply (
552570 lambda recipe : get_mask (
553- seq = "N" * int (recipe .split ("-" )[3 ]),
554- keep_Ns = True ,
571+ seq = "N" * int (recipe .split ("-" )[- 1 ])
572+ if int (recipe .split ("-" )[- 1 ]) > 0
573+ else "N" * self .cycles ["R2" ],
574+ keep = "Ns" ,
555575 prefix = "R2:" ,
556576 cycles_used = self .cycles ["R2" ],
557577 )
0 commit comments