Skip to content

Commit d34b03b

Browse files
committed
ENH update 03_Quality_control
1 parent 54ea8ba commit d34b03b

File tree

3 files changed

+26
-6
lines changed

3 files changed

+26
-6
lines changed

General_Scripts/03_Quality_control/Readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@
5050
| :---: | :---: | :---: | :---: |
5151
| 01_merge.py | Merge all the quality control results | GMSC.cluster.tsv.gz rnacode_true_100AA.tsv.xz rnacode_false_100AA.tsv.xz antifam_result.tsv coverage_analysis.tsv.gz riboseq_100AA.tsv.gz 100AA_coordinate.tsv.gz metaT_100AA.tsv.gz rnacode_true_90AA.tsv.xz rnacode_false_90AA.tsv.xz antifam_90AA.tsv metaP_90AA.tsv.gz riboseq_90AA.tsv.gz 90AA_coordinate.tsv.gz metaT_90AA.tsv.gz | GMSC10.100AA.quality.tsv.xz GMSC10.90AA.quality.tsv.xz allpass_100AA.txt allpass_90AA.txt |
5252
| 02_statistic.py | Merge all the quality control results | GMSC.cluster.tsv.gz rnacode_true_100AA.tsv.xz rnacode_false_100AA.tsv.xz antifam_result.tsv coverage_analysis.tsv.gz riboseq_100AA.tsv.gz 100AA_coordinate.tsv.gz metaT_100AA.tsv.gz | allquality_100AA.tsv.gz allpass_100AA.txt |
53-
| 03_merge_all.py | Merge all the values of quality control results | GMSC10.100AA.quality.tsv.xz 100AA_RNAcode.tsv 100AA_metaT.tsv 100AA_RiboSeq.tsv 100AA_metaP_all.tsv GMSC10.90AA.quality.tsv.xz 90AA_RNAcode.tsv 90AA_metaT.tsv 90AA_RiboSeq.tsv 90AA_metaP.tsv | GMSC10.100AA.quality.tsv.xz GMSC10.90AA.quality.tsv.xz allpass_100AA.txt allpass_90AA.txt GMSC10.100AA.quality_test.tsv GMSC10.90AA.quality_test.tsv |
53+
| 03_merge_all.py | Merge all the values of quality control results | GMSC10.100AA.quality.tsv.xz 100AA_RNAcode.tsv 100AA_metaT.tsv 100AA_RiboSeq.tsv 100AA_metaP_all.tsv GMSC10.90AA.quality.tsv.xz 90AA_RNAcode.tsv 90AA_metaT.tsv 90AA_RiboSeq.tsv 90AA_metaP.tsv | GMSC10.100AA.quality.tsv.xz GMSC10.90AA.quality.tsv.xz allpass_100AA.txt allpass_90AA.txt GMSC10.100AA.quality_test.tsv GMSC10.90AA.quality_test.tsv GMSC10.100AA.high_quality.tsv GMSC10.90AA.high_quality.tsv |

General_Scripts/03_Quality_control/merge_quality_control/03_merge_all.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,21 @@ def merge(number,n,infile1,infile2,infile3,infile4,infile5,outfile):
4444
name = f'GMSC10.{n}AA.{nf[:3]}_{nf[3:6]}_{nf[6:9]}'
4545
out.write(f'{antifam[name]}\t{terminal[name]}\t{rnacode[name]}\t{metat[name]}\t{riboseq[name]}\t{metap[name]}\n')
4646

47+
def hq(infile,outfile,aa):
48+
with open(outfile,'wt') as out:
49+
with open(infile,'rt') as f:
50+
for n, line in enumerate(f):
51+
if line.startswith('AntiFam'):
52+
continue
53+
else:
54+
antifam,terminal,rnacode,metat,riboseq,metap = line.strip().split('\t')
55+
if rnacode != 'NA':
56+
if (antifam == 'T' and terminal == 'T' and float(rnacode)<0.05) and (int(metat)>1 or int(riboseq)>1 or round(float(metap),1) >= 0.5):
57+
number = n-1
58+
nf = f'{number:09}'
59+
name = f'GMSC10.{aa}AA.{nf[:3]}_{nf[3:6]}_{nf[6:9]}'
60+
out.write(f'{name}\n')
61+
4762
NUMBER_100 = 964970496
4863
NUMBER_90 = 287926875
4964

@@ -52,13 +67,18 @@ def merge(number,n,infile1,infile2,infile3,infile4,infile5,outfile):
5267
infile3 = '100AA_metaT.tsv'
5368
infile4 = '100AA_RiboSeq.tsv'
5469
infile5 = '100AA_metaP_all.tsv'
55-
outfile = 'GMSC10.100AA.quality_test.tsv'
56-
merge(NUMBER_100,100,infile1,infile2,infile3,infile4,infile5,outfile)
70+
outfile1 = 'GMSC10.100AA.quality_test.tsv'
71+
merge(NUMBER_100,100,infile1,infile2,infile3,infile4,infile5,outfile1)
5772

5873
infile1 = 'GMSC10.90AA.quality.tsv.xz'
5974
infile2 = '90AA_RNAcode.tsv'
6075
infile3 = '90AA_metaT.tsv'
6176
infile4 = '90AA_RiboSeq.tsv'
6277
infile5 = '90AA_metaP.tsv'
63-
outfile = 'GMSC10.90AA.quality_test.tsv'
64-
merge(NUMBER_90,90,infile1,infile2,infile3,infile4,infile5,outfile)
78+
outfile2 = 'GMSC10.90AA.quality_test.tsv'
79+
merge(NUMBER_90,90,infile1,infile2,infile3,infile4,infile5,outfile2)
80+
81+
outfile3 = 'GMSC10.100AA.high_quality.tsv'
82+
outfile4 = 'GMSC10.90AA.high_quality.tsv'
83+
hq(outfile1,outfile3,100)
84+
hq(outfile2,outfile4,90)

General_Scripts/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ The folder contains scripts to generate GMSC resourece from the raw data.
8585
| :---: | :---: | :---: | :---: |
8686
| 01_merge.py | Merge all the quality control results | GMSC.cluster.tsv.gz rnacode_true_100AA.tsv.xz rnacode_false_100AA.tsv.xz antifam_result.tsv coverage_analysis.tsv.gz riboseq_100AA.tsv.gz 100AA_coordinate.tsv.gz metaT_100AA.tsv.gz rnacode_true_90AA.tsv.xz rnacode_false_90AA.tsv.xz antifam_90AA.tsv metaP_90AA.tsv.gz riboseq_90AA.tsv.gz 90AA_coordinate.tsv.gz metaT_90AA.tsv.gz | GMSC10.100AA.quality.tsv.xz GMSC10.90AA.quality.tsv.xz allpass_100AA.txt allpass_90AA.txt |
8787
| 02_statistic.py | Merge all the quality control results | GMSC.cluster.tsv.gz rnacode_true_100AA.tsv.xz rnacode_false_100AA.tsv.xz antifam_result.tsv coverage_analysis.tsv.gz riboseq_100AA.tsv.gz 100AA_coordinate.tsv.gz metaT_100AA.tsv.gz | allquality_100AA.tsv.gz allpass_100AA.txt |
88-
| 03_merge_all.py | Merge all the values of quality control results | GMSC10.100AA.quality.tsv.xz 100AA_RNAcode.tsv 100AA_metaT.tsv 100AA_RiboSeq.tsv 100AA_metaP_all.tsv GMSC10.90AA.quality.tsv.xz 90AA_RNAcode.tsv 90AA_metaT.tsv 90AA_RiboSeq.tsv 90AA_metaP.tsv | GMSC10.100AA.quality.tsv.xz GMSC10.90AA.quality.tsv.xz allpass_100AA.txt allpass_90AA.txt GMSC10.100AA.quality_test.tsv GMSC10.90AA.quality_test.tsv |
88+
| 03_merge_all.py | Merge all the values of quality control results | GMSC10.100AA.quality.tsv.xz 100AA_RNAcode.tsv 100AA_metaT.tsv 100AA_RiboSeq.tsv 100AA_metaP_all.tsv GMSC10.90AA.quality.tsv.xz 90AA_RNAcode.tsv 90AA_metaT.tsv 90AA_RiboSeq.tsv 90AA_metaP.tsv | GMSC10.100AA.quality.tsv.xz GMSC10.90AA.quality.tsv.xz allpass_100AA.txt allpass_90AA.txt GMSC10.100AA.quality_test.tsv GMSC10.90AA.quality_test.tsv GMSC10.100AA.high_quality.tsv GMSC10.90AA.high_quality.tsv |
8989

9090
## 04_Frozen
9191

0 commit comments

Comments
 (0)