Skip to content

Commit 4339db6

Browse files
committed
ENH modify codes and remove useless codes
1 parent af672c5 commit 4339db6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+298
-578
lines changed

Basic_code/01_deduplicate_sort_merge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ def mergeseq(outfile):
8181
preseq = seq
8282
print("finish merge")
8383

84-
INPUT_FILE = "/data/GMSC10.metag_Prog_smorfs.faa.gz"
85-
OUTPUT_FILE = "/data/smorf_dedup.faa.gz"
84+
INPUT_FILE = "./data/GMSC10.metag_Prog_smorfs.faa.gz"
85+
OUTPUT_FILE = "./data/smorf_dedup.faa.gz"
8686

8787
splits = splitseq(INPUT_FILE)
8888
for sp in bvalue(splits):

Basic_code/02_extract.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ def extract_seq(infile1,infile2,outfile1,outfile2):
2525
out2.write(f'>{ID}\n{seq}\n')
2626

2727

28-
INPUT_FILE_1 = "/data/metag_ProG.raw_number.tsv.gz"
29-
INPUT_FILE_2 = "/data/metag_ProG_dedup.faa.gz"
30-
OUT_FILE_1 = "/data/metag_ProG_nonsingleton.faa.gz"
31-
OUT_FILE_2 = "/data/metag_ProG_singleton.faa.gz"
28+
INPUT_FILE_1 = "./data/metag_ProG.raw_number.tsv.gz"
29+
INPUT_FILE_2 = "./data/metag_ProG_dedup.faa.gz"
30+
OUT_FILE_1 = "./data/metag_ProG_nonsingleton.faa.gz"
31+
OUT_FILE_2 = "./data/metag_ProG_singleton.faa.gz"
3232
extract_seq(INPUT_FILE_1,INPUT_FILE_2,OUT_FILE_1,OUT_FILE_2)

Basic_code/04_select1000.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,16 @@ def select(infile,outfile,NR_SINGLETONS):
3131
if n in selected:
3232
out.write(f'>{ID}\n{seq}\n')
3333

34-
INPUT_FILE_1 = "/clust_result/0.9_result/0.9clu_singleton_name"
35-
INPUT_FILE_2 = "/clust_result/0.9_result/metag_ProG_nonsingleton_0.9_clu_rep.faa"
36-
INPUT_FILE_3 = "/clust_result/0.5_result/0.5clu_singleton_name"
37-
INPUT_FILE_4 = "/clust_result/0.5_result/metag_ProG_nonsingleton_0.5_clu_rep.faa"
38-
OUT_FILE_1 = "/clust_result/0.9_result/0.9clu_singleton.faa"
39-
OUT_FILE_2 = "/clust_result/0.9_result/0.9clu_nonsingleton.faa"
40-
OUT_FILE_3 = "/clust_result/0.5_result/0.5clu_singleton.faa"
41-
OUT_FILE_4 = "/clust_result/0.5_result/0.5clu_nonsingleton.faa"
42-
OUT_FILE_5 = "/clust_result/0.9_result/0.9clu_singleton_1000.faa"
43-
OUT_FILE_6 = "/clust_result/0.5_result/0.5clu_singleton_1000.faa"
34+
INPUT_FILE_1 = "./clust_result/0.9_result/0.9clu_singleton_name"
35+
INPUT_FILE_2 = "./clust_result/0.9_result/metag_ProG_nonsingleton_0.9_clu_rep.faa"
36+
INPUT_FILE_3 = "./clust_result/0.5_result/0.5clu_singleton_name"
37+
INPUT_FILE_4 = "./clust_result/0.5_result/metag_ProG_nonsingleton_0.5_clu_rep.faa"
38+
OUT_FILE_1 = "./clust_result/0.9_result/0.9clu_singleton.faa"
39+
OUT_FILE_2 = "./clust_result/0.9_result/0.9clu_nonsingleton.faa"
40+
OUT_FILE_3 = "./clust_result/0.5_result/0.5clu_singleton.faa"
41+
OUT_FILE_4 = "./clust_result/0.5_result/0.5clu_nonsingleton.faa"
42+
OUT_FILE_5 = "./clust_result/0.9_result/0.9clu_singleton_1000.faa"
43+
OUT_FILE_6 = "./clust_result/0.5_result/0.5clu_singleton_1000.faa"
4444

4545
extract(INPUT_FILE_1,INPUT_FILE_2,OUT_FILE_1,OUT_FILE_2)
4646
extract(INPUT_FILE_3,INPUT_FILE_4,OUT_FILE_3,OUT_FILE_4)

Basic_code/06_split_singletons.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def splitseq(infile,X,outfile):
2929
break
3030
out.close()
3131

32-
INPUT_FILE = "/data/metag_ProG_singleton.faa.gz"
33-
SPLIT_FILE_PAT = "/diamond/split/sub{ix}.faa.gz"
32+
INPUT_FILE = "./data/metag_ProG_singleton.faa.gz"
33+
SPLIT_FILE_PAT = "./diamond/split/sub{ix}.faa.gz"
3434

3535
splitseq(INPUT_FILE, 100000000, SPLIT_FILE_PAT)

Basic_code/07_diamond.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@ mkdir result_0.9
1717
DIR="./split"
1818
for file in $(ls $DIR)
1919
do
20-
diamond blastp -q ./split/$file -d metag_ProG_ns_0.5 -o ./result_0.5/$file.tsv -e 0.00001 --id 90 -b 12 -c 1
21-
diamond blastp -q ./split/$file -d metag_ProG_ns_0.9 -o ./result_0.9/$file.tsv -e 0.00001 --id 90 -b 12 -c 1
20+
diamond blastp -q ./split/$file -d metag_ProG_ns_0.5 -o ./result_0.5/$file.tsv -e 0.00001 --id 50 -b 12 -c 1 --query-cover 90 --subject-cover 90
21+
diamond blastp -q ./split/$file -d metag_ProG_ns_0.9 -o ./result_0.9/$file.tsv -e 0.00001 --id 90 -b 12 -c 1 --query-cover 90 --subject-cover 90
2222
done

Basic_code/08_1_add_length.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

Basic_code/08_2_filter_coverage.sh

Lines changed: 0 additions & 24 deletions
This file was deleted.

Basic_code/08_3_identify_clusters.py renamed to Basic_code/08_identify_clusters.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ def identify(infile,outfile):
1818
out.close()
1919

2020
for i in range(24):
21-
INPUT_FILE_1 = "/diamond/analysis/analysis_0.5/sub"+str(i)+".faa.gz.tsv.tmp.3"
22-
INPUT_FILE_2 = "/diamond/analysis/analysis_0.9/sub"+str(i)+".faa.gz.tsv.tmp.3"
23-
OUT_FILE_1 = "/diamond/analysis/analysis_0.5/sub"+str(i)+".faa.gz.tsv.tmp.4"
24-
OUT_FILE_2 = "/diamond/analysis/analysis_0.9/sub"+str(i)+".faa.gz.tsv.tmp.4"
21+
INPUT_FILE_1 = "./diamond/analysis/analysis_0.5/sub"+str(i)+".faa.gz.tsv"
22+
INPUT_FILE_2 = "./diamond/analysis/analysis_0.9/sub"+str(i)+".faa.gz.tsv"
23+
OUT_FILE_1 = "./diamond/analysis/analysis_0.5/sub"+str(i)+".faa.gz.tsv.tmp"
24+
OUT_FILE_2 = "./diamond/analysis/analysis_0.9/sub"+str(i)+".faa.gz.tsv.tmp"
2525
identify(INPUT_FILE_1,OUT_FILE_1)
2626
identify(INPUT_FILE_2,OUT_FILE_2)

Basic_code/08_4_join_rescue_result.py renamed to Basic_code/09_join_rescue_result.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def join(infile1,infile2,outfile):
2828
out.write(key+"\t"+value+"\t"+tsv90[key]+"\n")
2929
out.close()
3030

31-
INPUT_FILE_1 = "/diamond/analysis/analysis_0.5/singleton_0.5.tsv"
32-
INPUT_FILE_2 = "/diamond/analysis/analysis_0.9/singleton_0.9.tsv"
33-
OUT_FILE = "/diamond/analysis/singleton_0.5_0.9.tsv"
31+
INPUT_FILE_1 = "./diamond/analysis/analysis_0.5/singleton_0.5.tsv"
32+
INPUT_FILE_2 = "./diamond/analysis/analysis_0.9/singleton_0.9.tsv"
33+
OUT_FILE = "./diamond/analysis/singleton_0.5_0.9.tsv"
3434
join(INPUT_FILE_1,INPUT_FILE_2,OUT_FILE)

frozen/01_rename_list.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,13 @@ def rename_singleton(infile1,infile2,outfile,n,prefix):
5656
n += 1
5757
out.close()
5858

59-
INPUT_FILE_1 = "/data/metag_ProG.raw_number.tsv.gz"
60-
INPUT_FILE_2 = "/data/metag_ProG_nonsingleton.faa.gz"
59+
INPUT_FILE_1 = "./data/metag_ProG.raw_number.tsv.gz"
60+
INPUT_FILE_2 = "./data/metag_ProG_nonsingleton.faa.gz"
6161
INPUT_FILE_3 = "singleton_0.5_0.9.tsv"
62-
INPUT_FILE_4 = "/data/metag_ProG_singleton.faa.gz"
63-
OUTPUT_FILE_1 = "/data/nonsingleton_rename_seq.tsv"
64-
OUTPUT_FILE_2 = "/home1/duanyq/GMSC/data/nonsingleton_rename.tsv"
65-
OUTPUT_FILE_3 = "/home1/duanyq/GMSC/data/singleton_rename.tsv"
62+
INPUT_FILE_4 = "./data/metag_ProG_singleton.faa.gz"
63+
OUTPUT_FILE_1 = "./data/nonsingleton_rename_seq.tsv"
64+
OUTPUT_FILE_2 = "./data/nonsingleton_rename.tsv"
65+
OUTPUT_FILE_3 = "./data/singleton_rename.tsv"
6666

6767
sort(INPUT_FILE_1,OUTPUT_FILE_1,559407227,'GMSC10.100AA')
6868
rename_nonsingleton(INPUT_FILE_2,OUTPUT_FILE_1,OUTPUT_FILE_2)

0 commit comments

Comments
 (0)