@@ -44,6 +44,21 @@ def merge(number,n,infile1,infile2,infile3,infile4,infile5,outfile):
44
44
name = f'GMSC10.{ n } AA.{ nf [:3 ]} _{ nf [3 :6 ]} _{ nf [6 :9 ]} '
45
45
out .write (f'{ antifam [name ]} \t { terminal [name ]} \t { rnacode [name ]} \t { metat [name ]} \t { riboseq [name ]} \t { metap [name ]} \n ' )
46
46
47
+ def hq (infile ,outfile ,aa ):
48
+ with open (outfile ,'wt' ) as out :
49
+ with open (infile ,'rt' ) as f :
50
+ for n , line in enumerate (f ):
51
+ if line .startswith ('AntiFam' ):
52
+ continue
53
+ else :
54
+ antifam ,terminal ,rnacode ,metat ,riboseq ,metap = line .strip ().split ('\t ' )
55
+ if rnacode != 'NA' :
56
+ if (antifam == 'T' and terminal == 'T' and float (rnacode )< 0.05 ) and (int (metat )> 1 or int (riboseq )> 1 or round (float (metap ),1 ) >= 0.5 ):
57
+ number = n - 1
58
+ nf = f'{ number :09} '
59
+ name = f'GMSC10.{ aa } AA.{ nf [:3 ]} _{ nf [3 :6 ]} _{ nf [6 :9 ]} '
60
+ out .write (f'{ name } \n ' )
61
+
47
62
NUMBER_100 = 964970496
48
63
NUMBER_90 = 287926875
49
64
@@ -52,13 +67,18 @@ def merge(number,n,infile1,infile2,infile3,infile4,infile5,outfile):
52
67
infile3 = '100AA_metaT.tsv'
53
68
infile4 = '100AA_RiboSeq.tsv'
54
69
infile5 = '100AA_metaP_all.tsv'
55
- outfile = 'GMSC10.100AA.quality_test.tsv'
56
- merge (NUMBER_100 ,100 ,infile1 ,infile2 ,infile3 ,infile4 ,infile5 ,outfile )
70
+ outfile1 = 'GMSC10.100AA.quality_test.tsv'
71
+ merge (NUMBER_100 ,100 ,infile1 ,infile2 ,infile3 ,infile4 ,infile5 ,outfile1 )
57
72
58
73
infile1 = 'GMSC10.90AA.quality.tsv.xz'
59
74
infile2 = '90AA_RNAcode.tsv'
60
75
infile3 = '90AA_metaT.tsv'
61
76
infile4 = '90AA_RiboSeq.tsv'
62
77
infile5 = '90AA_metaP.tsv'
63
- outfile = 'GMSC10.90AA.quality_test.tsv'
64
- merge (NUMBER_90 ,90 ,infile1 ,infile2 ,infile3 ,infile4 ,infile5 ,outfile )
78
+ outfile2 = 'GMSC10.90AA.quality_test.tsv'
79
+ merge (NUMBER_90 ,90 ,infile1 ,infile2 ,infile3 ,infile4 ,infile5 ,outfile2 )
80
+
81
+ outfile3 = 'GMSC10.100AA.high_quality.tsv'
82
+ outfile4 = 'GMSC10.90AA.high_quality.tsv'
83
+ hq (outfile1 ,outfile3 ,100 )
84
+ hq (outfile2 ,outfile4 ,90 )
0 commit comments