Skip to content

Commit 7e079fb

Browse files
committed
Refactor filter_genes function to improve readability and maintainability by simplifying conditionals for keep and discard logic
1 parent 0cae40a commit 7e079fb

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

bin/Filter.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,16 +125,22 @@ def filter_genes(tpm_cutoff, cov_cutoff, blast_pident, blast_qcovs, rex_pident,
125125
filter = filter.merge(lncrna, on="transcript_id", how="left")
126126

127127
# Identify strong keep hits while maintaining as much feature diversity as possible
128-
data["keep"] = pd.Series(((filter["COVERAGE"] == True) & ((filter["blast_pident"] == True) & (filter["blast_qcovs"] == True) & (filter["PFAM"] == True) | ((filter["AUGUSTUS"] == True) & (filter["HELIXER"] == True)))) |
129-
((filter["singleExon"] == False) & (filter["LncRNA_predict"] == False) & (filter["COVERAGE"] == True) & (filter["TPM"] == True)) |
130-
((filter["singleExon"] == False) & (filter["PFAM"] == True) & (filter["blast_pident"] == True) & (filter["blast_qcovs"] == True)) |
131-
((filter["AUGUSTUS"] == True) & (filter["HELIXER"] == True) & ((filter["rex_pident"] == False) & (filter["rex_qcovs"] == False)) & (filter["REPEAT"] == False) & (filter["LncRNA_predict"] == False) & (filter["singleExon"] == False)))).fillna(False)
128+
keep_cond = (
129+
((filter["COVERAGE"] == True) & (((filter["blast_pident"] == True) & (filter["blast_qcovs"] == True) & (filter["PFAM"] == True)) | ((filter["AUGUSTUS"] == True) & (filter["HELIXER"] == True)))) |
130+
((filter["singleExon"] == False) & (filter["LncRNA_predict"] == False) & (filter["COVERAGE"] == True) & (filter["TPM"] == True)) |
131+
((filter["singleExon"] == False) & (filter["PFAM"] == True) & (filter["blast_pident"] == True) & (filter["blast_qcovs"] == True)) |
132+
((filter["AUGUSTUS"] == True) & (filter["HELIXER"] == True) & ((filter["rex_pident"] == False) & (filter["rex_qcovs"] == False)) & (filter["REPEAT"] == False) & (filter["LncRNA_predict"] == False) & (filter["singleExon"] == False))
133+
)
134+
data["keep"] = pd.Series(keep_cond).fillna(False)
132135

133136
# Identify strong discard hits while maintaining as much feature diversity as possible
134-
data["discard"] = pd.Series((filter["COVERAGE"] == False) & (filter["AUGUSTUS"] == False) & (filter["HELIXER"] == False) & (filter["blast_pident"] == False) & (filter["blast_qcovs"] == False) & (filter["PFAM"] == False) |
137+
discard_cond = (
138+
((filter["COVERAGE"] == False) & (filter["AUGUSTUS"] == False) & (filter["HELIXER"] == False) & (filter["blast_pident"] == False) & (filter["blast_qcovs"] == False) & (filter["PFAM"] == False)) |
135139
((filter["singleExon"] == True) & ((filter["blast_pident"] == False) & (filter["blast_qcovs"] == False)) & (filter["PFAM"] == False) & (filter["COVERAGE"] == False)) |
136-
(((filter["rex_pident"] == True) & (filter["rex_qcovs"] == True)) & (filter["REPEAT"] == True)) & ((filter["blast_pident"] == False) & (filter["blast_qcovs"] == False)) |
137-
(filter["LncRNA_predict"] == True) & (filter["singleExon"] == True) & (filter["PFAM"] == "False") & (filter["AUGUSTUS"] == False) & (filter["HELIXER"] == False))).fillna(False)
140+
((((filter["rex_pident"] == True) & (filter["rex_qcovs"] == True)) & (filter["REPEAT"] == True)) & ((filter["blast_pident"] == False) & (filter["blast_qcovs"] == False))) |
141+
((filter["LncRNA_predict"] == True) & (filter["singleExon"] == True) & (filter["PFAM"] == "False") & (filter["AUGUSTUS"] == False) & (filter["HELIXER"] == False))
142+
)
143+
data["discard"] = pd.Series(discard_cond).fillna(False)
138144

139145
data["label"] = data.apply(lambda x: "Discard" if x["discard"] else ("Keep" if x["keep"] else "None"), axis=1)
140146

0 commit comments

Comments
 (0)