-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathTRFdat_to_bed.py
More file actions
47 lines (42 loc) · 1.74 KB
/
TRFdat_to_bed.py
File metadata and controls
47 lines (42 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
from argparse import (ArgumentParser, FileType)
def parse_args():
"Parse the input arguments, use '-h' for help"
parser = ArgumentParser(description='Convert Tandem Repeat Finder (TRF) dat file to bed format with repeat units for microsatellite genotyping')
parser.add_argument(
'--dat', type=str, required=True,
help='Input dat file produced by Tandem Repeat Finder (TRF) using the -d option')
parser.add_argument(
'--bed', type=str, required=True,
help='Output bed file containing genomic locations and repeat units of microsatellites.')
return parser.parse_args()
### Main
def main():
# Parse command line arguments
args = parse_args()
datfile = args.dat
bedfile = args.bed
with open(bedfile, 'w') as bed:
chrom = ""
with open(datfile, 'r') as dat:
for line in dat:
splitline = line.split()
if line.startswith("Sequence:"):
chrom = line.split()[1]
else:
# Catch index errors when line is blank
try:
# Check if in header sequence (all non-header lines start with an int: start pos)
try:
int(splitline[0])
except ValueError:
continue
start = str(int(splitline[0]) - 1)
end = splitline[1]
motif = splitline[13]
copynum = splitline[3]
bed.write('\t'.join([chrom,start,end,motif,copynum]) + '\n')
except IndexError:
pass
if __name__ == '__main__':
main()