Skip to content

Commit 3232960

Browse files
committed
ENH update 10_Transmembrane_secreted
1 parent 164a458 commit 3232960

File tree

9 files changed

+29
-24
lines changed

9 files changed

+29
-24
lines changed
Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
#!/usr/bin/env bash
22

33
# Concept:
4-
# Run SisnalP 4.1 on 90AA smORF families
4+
# Run SisnalP 5.0 on 90AA smORF families
5+
for n in {0..2399}
6+
do
7+
signalp -fasta sub_${n}.faa -org gram+ -batch 100000
8+
signalp -fasta sub_${n}.faa -org gram- -batch 100000
9+
done
510

6-
signalp -t gram+ 90AA_GMSC.faa >90AA_signalp_gram_positive.tsv
7-
signalp -t gram- 90AA_GMSC.faa >90AA_signalp_gram_negative.tsv
11+
for n in {0..62}
12+
do
13+
signalp -fasta sub_${n}.faa -org arch -batch 100000
14+
done

General_Scripts/10_Transmembrane_secreted/01_Annotation/04_find_signal.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ def statistic(infile,outfile):
99
continue
1010
else:
1111
linelist = line.strip().split(' ')
12-
print(linelist)
1312
if linelist[18] == 'Y':
1413
out.write(line)
1514
out.close()

General_Scripts/10_Transmembrane_secreted/02_Compare between archaea and bacteria/01_map_taxonomy_trans.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
'''
55

66
def store_trans(infile1):
7-
import gzip
87
trans = set()
9-
with gzip.open(infile1,'rt') as f:
8+
with open(infile1,'rt') as f:
109
for line in f:
1110
trans.add(line.strip())
1211
return trans
@@ -30,8 +29,8 @@ def count_trans(infile,outfile):
3029
trans_count = df.groupby(['domain','phylum'])['transmembrane'].value_counts()
3130
trans_count.to_csv(outfile)
3231

33-
infile1 = '90AA_tm_signal.tsv.gz'
34-
infile2 = '90AA_ref_taxonomy_format.tsv.xz'
32+
infile1 = '90AA_tm_signal.tsv'
33+
infile2 = '90AA_tax.tsv.xz'
3534
outfile1 = 'trans_taxa.tsv'
3635
outfile2 = 'trans_phylum.csv'
3736

General_Scripts/10_Transmembrane_secreted/02_Compare between archaea and bacteria/02_extract_cog.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,11 @@ def cog(infile1,infile2,infile3,outfile):
3939

4040
merged.to_csv(outfile,sep='\t',index=None)
4141

42-
infile1 = '90AA_ref_taxonomy_format.tsv.xz'
42+
infile1 = '90AA_tax.tsv.xz'
4343
infile2 = '1_cdd_tcov_90AA.tsv.gz'
4444
infile3 = 'cddid_all.tbl'
4545
infile4 = 'cog-20.def.tab.tsv'
46+
4647
outfile1 = 'bac_motif.txt'
4748
outfile2 = 'arc_motif.txt'
4849
outfile3 = '0_arc_motif_cog.tsv'

General_Scripts/10_Transmembrane_secreted/02_Compare between archaea and bacteria/03_count_cog_class.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
'''
2-
Count cog class number an fraction of smORFs with annotation
2+
Count the number of each COG class of smORFs with annotation
33
'''
44
def merge_class(infile):
55
seq_cogs = {}

General_Scripts/10_Transmembrane_secreted/02_Compare between archaea and bacteria/04_count_cog.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
'''
2-
Count cog number an fraction of smORFs with annotation
2+
Count the number of each COG of smORFs with annotation
33
'''
44
import pandas as pd
55

@@ -22,7 +22,7 @@ def count_bg(infile,outfile):
2222
infile1 = '0_arc_motif_cog.tsv'
2323
outfile1 = '1_arc_motif_cog_count.tsv'
2424
infile2 = '0_bg_motif_cog.tsv'
25-
outfile2 = '1_bg_motif_cog_count.tsv_new'
25+
outfile2 = '1_bg_motif_cog_count.tsv'
2626

2727
count_arc(infile1,outfile1)
2828
count_bg(infile2,outfile2)

General_Scripts/10_Transmembrane_secreted/02_Compare between archaea and bacteria/05_count_cog_class_trans.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
Count cog class number an fraction of transmembrane or secreted smORFs with annotation
33
'''
44
def store(infile0):
5-
import gzip
65
trans = set()
7-
with gzip.open(infile0,'rt') as f:
6+
with open(infile0,'rt') as f:
87
for line in f:
98
trans.add(line.strip())
109
return trans
@@ -55,7 +54,7 @@ def count_class(trans,seq_cogs,outfile):
5554
out.write(f'{key}\t0\t{t}\t{cog_class_not[key]}\t{f}\n')
5655

5756

58-
infile = '90AA_tm_signal.tsv.gz'
57+
infile = '90AA_tm_signal.tsv'
5958
infile1 = '0_arc_motif_cog.tsv'
6059
outfile1 = '1_arc_motif_cog_class_count_trans.tsv'
6160
infile2 = '0_bac_motif_cog.tsv'

General_Scripts/10_Transmembrane_secreted/02_Compare between archaea and bacteria/06_count_cog_trans.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
def store(infile):
2-
import gzip
32
trans = set()
4-
with gzip.open(infile0,'rt') as f:
3+
with open(infile,'rt') as f:
54
for line in f:
65
trans.add(line.strip())
76
return trans
@@ -53,10 +52,11 @@ def count_bac(infile1,infile2,outfile1,outfile2):
5352
cog_count['bac_not_trans_all'] = len(result.drop_duplicates('smorf',keep='first'))
5453
cog_count.to_csv(outfile2,sep='\t',index=None)
5554

56-
infile = '90AA_tm_signal.tsv.gz'
55+
infile = '90AA_tm_signal.tsv'
5756

5857
infile1 = '0_arc_motif_cog.tsv'
5958
infile2 = '0_bac_motif_cog.tsv'
59+
6060
outfile1 = '0_arc_motif_cog_trans.tsv'
6161
outfile2 = '0_arc_motif_cog_not_trans.tsv'
6262
outfile3 = '0_bac_motif_cog_trans.tsv'

General_Scripts/10_Transmembrane_secreted/Readme.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212

1313
| **Code** | **Description** | **Input** | **Output** |
1414
| :---: | :---: | :---: | :---: |
15-
| 01_map_taxonomy_trans.py | Map transmembrane or secreted families to taxonomy and count transmembrane or secreted fraction of each phylum | 90AA_tm_signal.tsv 90AA_ref_taxonomy_format.tsv.xz | trans_taxa.tsv trans_phylum.csv |
16-
| 02_extract_cog.py | Extract bacterial and archaeal 90AA families with COG annotation | 90AA_ref_taxonomy_format.tsv.xz 1_cdd_tcov_90AA.tsv.gz cddid_all.tbl cog-20.def.tab.tsv | 0_arc_motif_cog.tsv 0_bac_motif_cog.tsv 0_bg_motif_cog.tsv |
17-
| 03_count_cog_class.py | Count cog class number an fraction of smORFs with annotation | 0_bac_motif_cog.tsv 0_arc_motif_cog.tsv 0_bg_motif_cog.tsv | 1_bac_motif_cog_class_count.tsv 1_arc_motif_cog_class_count.tsv 1_bg_motif_cog_class_count.tsv |
18-
| 04_count_cog.py | Count cog number an fraction of smORFs with annotation | 0_arc_motif_cog.tsv 0_bg_motif_cog.tsv | 1_arc_motif_cog_count.tsv 1_bg_motif_cog_count.tsv_new |
19-
| 05_count_cog_class_trans.py | Count cog class number an fraction of transmembrane or secreted smORFs with annotation | 90AA_tm_signal.tsv.gz 0_arc_motif_cog.tsv 0_bac_motif_cog.tsv | 1_arc_motif_cog_class_count_trans.tsv 1_bac_motif_cog_class_count_trans.tsv |
20-
| 06_count_cog_trans.py | Combine TMHMM and SignalP results | 90AA_tm_signal.tsv.gz 90AA_tm_signal.tsv.gz 0_arc_motif_cog.tsv 0_bac_motif_cog.tsv | 1_arc_motif_cog_count_trans.tsv 1_arc_motif_cog_count_not_trans.tsv 1_bac_motif_cog_count_trans.tsv 1_bac_motif_cog_count_not_trans.tsv |
15+
| 01_map_taxonomy_trans.py | Map transmembrane or secreted families to taxonomy and count transmembrane or secreted fraction of each phylum | 90AA_tm_signal.tsv 90AA_tax.tsv.xz | trans_taxa.tsv trans_phylum.csv |
16+
| 02_extract_cog.py | Extract bacterial and archaeal 90AA families with COG annotation | 90AA_tax.tsv.xz 1_cdd_tcov_90AA.tsv.gz cddid_all.tbl cog-20.def.tab.tsv | 0_arc_motif_cog.tsv 0_bac_motif_cog.tsv 0_bg_motif_cog.tsv |
17+
| 03_count_cog_class.py | Count the number of each COG class of smORFs | 0_bac_motif_cog.tsv 0_arc_motif_cog.tsv 0_bg_motif_cog.tsv | 1_bac_motif_cog_class_count.tsv 1_arc_motif_cog_class_count.tsv 1_bg_motif_cog_class_count.tsv |
18+
| 04_count_cog.py | Count the number of each COG of smORFs | 0_arc_motif_cog.tsv 0_bg_motif_cog.tsv | 1_arc_motif_cog_count.tsv 1_bg_motif_cog_count.tsv_new |
19+
| 05_count_cog_class_trans.py | Count number of each cog class number of transmembrane or secreted smORFs | 90AA_tm_signal.tsv 0_arc_motif_cog.tsv 0_bac_motif_cog.tsv | 1_arc_motif_cog_class_count_trans.tsv 1_bac_motif_cog_class_count_trans.tsv |
20+
| 06_count_cog_trans.py | Count number of each COG of transmembrane or secreted smORFs | 90AA_tm_signal.tsv.gz 90AA_tm_signal.tsv.gz 0_arc_motif_cog.tsv 0_bac_motif_cog.tsv | 1_arc_motif_cog_count_trans.tsv 1_arc_motif_cog_count_not_trans.tsv 1_bac_motif_cog_count_trans.tsv 1_bac_motif_cog_count_not_trans.tsv |

0 commit comments

Comments
 (0)