@@ -125,16 +125,22 @@ def filter_genes(tpm_cutoff, cov_cutoff, blast_pident, blast_qcovs, rex_pident,
125125 filter = filter .merge (lncrna , on = "transcript_id" , how = "left" )
126126
127127 # Identify strong keep hits while maintaining as much feature diversity as possible
128- data ["keep" ] = pd .Series (((filter ["COVERAGE" ] == True ) & ((filter ["blast_pident" ] == True ) & (filter ["blast_qcovs" ] == True ) & (filter ["PFAM" ] == True ) | ((filter ["AUGUSTUS" ] == True ) & (filter ["HELIXER" ] == True )))) |
129- ((filter ["singleExon" ] == False ) & (filter ["LncRNA_predict" ] == False ) & (filter ["COVERAGE" ] == True ) & (filter ["TPM" ] == True )) |
130- ((filter ["singleExon" ] == False ) & (filter ["PFAM" ] == True ) & (filter ["blast_pident" ] == True ) & (filter ["blast_qcovs" ] == True )) |
131- ((filter ["AUGUSTUS" ] == True ) & (filter ["HELIXER" ] == True ) & ((filter ["rex_pident" ] == False ) & (filter ["rex_qcovs" ] == False )) & (filter ["REPEAT" ] == False ) & (filter ["LncRNA_predict" ] == False ) & (filter ["singleExon" ] == False )))).fillna (False )
128+ keep_cond = (
129+ ((filter ["COVERAGE" ] == True ) & (((filter ["blast_pident" ] == True ) & (filter ["blast_qcovs" ] == True ) & (filter ["PFAM" ] == True )) | ((filter ["AUGUSTUS" ] == True ) & (filter ["HELIXER" ] == True )))) |
130+ ((filter ["singleExon" ] == False ) & (filter ["LncRNA_predict" ] == False ) & (filter ["COVERAGE" ] == True ) & (filter ["TPM" ] == True )) |
131+ ((filter ["singleExon" ] == False ) & (filter ["PFAM" ] == True ) & (filter ["blast_pident" ] == True ) & (filter ["blast_qcovs" ] == True )) |
132+ ((filter ["AUGUSTUS" ] == True ) & (filter ["HELIXER" ] == True ) & ((filter ["rex_pident" ] == False ) & (filter ["rex_qcovs" ] == False )) & (filter ["REPEAT" ] == False ) & (filter ["LncRNA_predict" ] == False ) & (filter ["singleExon" ] == False ))
133+ )
134+ data ["keep" ] = pd .Series (keep_cond ).fillna (False )
132135
133136 # Identify strong discard hits while maintaining as much feature diversity as possible
134- data ["discard" ] = pd .Series ((filter ["COVERAGE" ] == False ) & (filter ["AUGUSTUS" ] == False ) & (filter ["HELIXER" ] == False ) & (filter ["blast_pident" ] == False ) & (filter ["blast_qcovs" ] == False ) & (filter ["PFAM" ] == False ) |
137+ discard_cond = (
138+ ((filter ["COVERAGE" ] == False ) & (filter ["AUGUSTUS" ] == False ) & (filter ["HELIXER" ] == False ) & (filter ["blast_pident" ] == False ) & (filter ["blast_qcovs" ] == False ) & (filter ["PFAM" ] == False )) |
135139 ((filter ["singleExon" ] == True ) & ((filter ["blast_pident" ] == False ) & (filter ["blast_qcovs" ] == False )) & (filter ["PFAM" ] == False ) & (filter ["COVERAGE" ] == False )) |
136- (((filter ["rex_pident" ] == True ) & (filter ["rex_qcovs" ] == True )) & (filter ["REPEAT" ] == True )) & ((filter ["blast_pident" ] == False ) & (filter ["blast_qcovs" ] == False )) |
137- (filter ["LncRNA_predict" ] == True ) & (filter ["singleExon" ] == True ) & (filter ["PFAM" ] == "False" ) & (filter ["AUGUSTUS" ] == False ) & (filter ["HELIXER" ] == False ))).fillna (False )
140+ ((((filter ["rex_pident" ] == True ) & (filter ["rex_qcovs" ] == True )) & (filter ["REPEAT" ] == True )) & ((filter ["blast_pident" ] == False ) & (filter ["blast_qcovs" ] == False ))) |
141+ ((filter ["LncRNA_predict" ] == True ) & (filter ["singleExon" ] == True ) & (filter ["PFAM" ] == "False" ) & (filter ["AUGUSTUS" ] == False ) & (filter ["HELIXER" ] == False ))
142+ )
143+ data ["discard" ] = pd .Series (discard_cond ).fillna (False )
138144
139145 data ["label" ] = data .apply (lambda x : "Discard" if x ["discard" ] else ("Keep" if x ["keep" ] else "None" ), axis = 1 )
140146
0 commit comments