33import logging
44import json
55import itertools as it
6- from typing import Dict , Iterable
6+ from typing import Dict
77
88import pandas as pd
99from Bio import SeqIO
1010from Bio .SeqRecord import SeqRecord
11- from Bio .SeqFeature import SeqFeature
1211from Bio .Seq import Seq
1312
1413
@@ -69,54 +68,6 @@ def calculate_ns_sites(codons: dict, genetic_code: dict) -> pd.DataFrame:
6968 return pd .DataFrame ({"feature" : features , "N" : N_sites , "S" : S_sites })
7069
7170
72- def iter_features_filtering (features : Iterable [SeqFeature ], included : Dict [str , str ], excluded : Dict [str , str ]) -> Iterable [SeqFeature ]:
73- # No filters
74- if len (included ) == 0 and len (excluded ) == 0 :
75- logging .debug ("Selecting all features" )
76- return iter (features )
77- # Only inclusion filter
78- elif len (included ) == 0 and len (excluded ) != 0 :
79- logging .debug (f"Selecting features excluding all of { excluded } " )
80- return (
81- feature for feature in features
82- if all (
83- (qualifier_value not in excluded .get (qualifier_key , []))
84- for qualifier_key in excluded .keys ()
85- for qualifier_value in feature .qualifiers .get (qualifier_key , [])
86- )
87- )
88- # Only exclusion filter
89- elif len (included ) != 0 and len (excluded ) == 0 :
90- logging .debug (f"Selecting features including any of { included } " )
91- return (
92- feature for feature in features
93- if any (
94- (qualifier_value in included .get (qualifier_key , []))
95- for qualifier_key in included .keys ()
96- for qualifier_value in feature .qualifiers .get (qualifier_key , [])
97- )
98- )
99- # Inclusion then exclusion filter
100- else :
101- logging .debug (f"Selecting features including any of { included } and then excluding all of { excluded } " )
102- included_features = (
103- feature for feature in features
104- if any (
105- (qualifier_value in included .get (qualifier_key , []))
106- for qualifier_key in included .keys ()
107- for qualifier_value in feature .qualifiers .get (qualifier_key , [])
108- )
109- )
110- return (
111- feature for feature in included_features
112- if all (
113- (qualifier_value not in excluded .get (qualifier_key , []))
114- for qualifier_key in excluded .keys ()
115- for qualifier_value in feature .qualifiers .get (qualifier_key , [])
116- )
117- )
118-
119-
12071def main ():
12172
12273 logging .basicConfig (
@@ -136,16 +87,7 @@ def main():
13687
13788 logging .info ("Extracting CDS" )
13889 coding_records = {}
139- included = snakemake .params .features .get ("INCLUDE" , {})
140- excluded = snakemake .params .features .get ("EXCLUDE" , {})
141- for feature in iter_features_filtering (gb .features , included , excluded ):
142- logging .debug (
143- "Processing SeqFeature: "
144- f"ID={ feature .id } type={ feature .type } location={ feature .location } "
145- f"gene={ feature .qualifiers .get ('gene' , [])} "
146- f"locus_tag={ feature .qualifiers .get ('locus_tag' , [])} "
147- f"product={ feature .qualifiers .get ('product' , [])} "
148- )
90+ for feature in gb .features :
14991 identifier = "|" .join (feature .qualifiers .get (snakemake .params .gb_qualifier_display , []))
15092 if identifier == "" :
15193 logging .error (f"Feature at { feature .location } has no qualifier '{ snakemake .params .gb_qualifier_display } '" )
0 commit comments