Skip to content

Commit c73ddc3

Browse files
committed
ENH update 09_Density
1 parent 54b95e8 commit c73ddc3

File tree

4 files changed

+12
-7
lines changed

4 files changed

+12
-7
lines changed

General_Scripts/09_Density/01_100AA_copy_number_per_tax.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
def store_100(infile):
2-
import lzma
2+
import gzip
33
name = set()
4-
with lzma.open(infile,'rt') as f:
4+
with gzip.open(infile,'rt') as f:
55
for line in f:
6-
old,new = line.strip().split('\t')
7-
name.add(old)
6+
member,cluster = line.strip().split('\t')
7+
name.add(member)
88
return name
99

1010
def cal(infile,name,outfile):
1111
import lzma
12+
1213
kingdom = {}
1314
phylum = {}
1415
cl = {}
1516
order = {}
1617
family = {}
1718
genus = {}
1819
species = {}
20+
1921
with lzma.open(infile,'rt') as f:
2022
for line in f:
2123
linelist = line.strip().split('\t',2)
@@ -75,7 +77,7 @@ def cal(infile,name,outfile):
7577
for key,value in species.items():
7678
out.write(f'{key}\t{value}\n')
7779

78-
infile1 = '100AA_rename.tsv.xz'
80+
infile1 = 'GMSC.cluster.tsv.gz'
7981
infile2 = 'metag_cluster_taxonomy.tsv.xz'
8082
outfile = 'cpnumber_per_tax.tsv'
8183

General_Scripts/09_Density/02_nbps_per_tax.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
'''
2+
Concept:
23
Add full ranks for npbs per taxon.
34
'''
45
def store_taxid_name(infile1):
56
taxid_dict = {}
67
with open(infile1,'rt') as f:
78
for line in f:
89
linelist = line.strip().split('\t',1)
9-
if len(linelist) ==2 :
10+
if len(linelist) == 2:
1011
name = linelist[1].replace('d__','').replace('p__','').replace('c__','').replace('o__','').replace('f__','').replace('g__','').replace('s__','').replace('\t',';')
1112
taxid_dict[linelist[0]] = name
1213
return taxid_dict
@@ -43,6 +44,7 @@ def per_tax(infile,outfile):
4344
family_number = {}
4445
genus_number = {}
4546
species_number = {}
47+
4648
with open(infile,'rt') as f:
4749
for line in f:
4850
taxid,rank,name,nbps = line.strip().split('\t')

General_Scripts/09_Density/03_calculate_density.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
'''
2+
Concept:
23
Calculate density of phylum and genus
34
'''
45
import pandas as pd

General_Scripts/09_Density/Readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
| **Code** | **Description** | **Input** | **Output** |
44
| :---: | :---: | :---: | :---: |
5-
| 01_100AA_copy_number_per_tax.py | Calculate the copy number of smORFs per taxonomy | 100AA_rename.tsv.xz metag_cluster_taxonomy.tsv.xz | cpnumber_per_tax.tsv |
5+
| 01_100AA_copy_number_per_tax.py | Calculate the copy number of smORFs per taxonomy | GMSC.cluster.tsv.gz metag_cluster_taxonomy.tsv.xz | cpnumber_per_tax.tsv |
66
| 02_nbps_per_tax.py | Calculate nbps per taxonomy | taxid_fullname_gtdb.tsv bps-per-taxon.tsv | per_tax_rank.txt full_nbp.txt |
77
| 03_calculate_density.py | Calculate density of phylum and genus | cpnumber_per_tax.tsv per_tax_rank.txt| density_phylum.tsv density_genus.tsv |

0 commit comments

Comments
 (0)