File tree Expand file tree Collapse file tree 2 files changed +30
-1
lines changed
General_Scripts/06_Compare_with_other_datasets Expand file tree Collapse file tree 2 files changed +30
-1
lines changed Original file line number Diff line number Diff line change 2
2
3
3
| ** Code** | ** Description** |
4
4
| :---: | :---: |
5
- | 01_download.sh 02_filter_sp_dedup.py | Download archaeal and bacterial proteins from Refseq, filter sequences (<100aa) and remove redundancy |
5
+ | 01_download.sh | Download archaeal and bacterial proteins from Refseq |
6
+ | 02_filter_sp_dedup.py | Filter sequences (<100aa) and remove redundancy |
6
7
| 03_align.sh | Use Diamond to align sequences to GMSC |
Original file line number Diff line number Diff line change
1
+ def fasta_iter (fname , full_header = False ):
2
+ header = None
3
+ chunks = []
4
+ if fname .endswith ('.gz' ):
5
+ import gzip
6
+ op = gzip .open
7
+ elif fname .endswith ('.xz' ):
8
+ import lzma
9
+ op = lzma .open
10
+ else :
11
+ op = open
12
+ with op (fname , 'rt' ) as f :
13
+ for line in f :
14
+ if line [0 ] == '>' :
15
+ if header is not None :
16
+ yield header ,'' .join (chunks )
17
+ line = line [1 :].strip ()
18
+ if not line :
19
+ header = ''
20
+ elif full_header :
21
+ header = line .strip ()
22
+ else :
23
+ header = line .split ()[0 ]
24
+ chunks = []
25
+ else :
26
+ chunks .append (line .strip ())
27
+ if header is not None :
28
+ yield header , '' .join (chunks )
You can’t perform that action at this time.
0 commit comments