Skip to content

Commit c565cd6

Browse files
author
Eric Normandeau
committed
Minor: Tweak exploratory script reporting
1 parent 1e8eec7 commit c565cd6

File tree

1 file changed

+8
-16
lines changed

1 file changed

+8
-16
lines changed

01_scripts/util/find_position_correction.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class Fasta(object):
1717
"""
1818

1919
def __init__(self, name, sequence):
20-
self.name = name
20+
self.name = name.split(" ")[0]
2121
self.sequence = sequence
2222

2323
def write_to_file(self, handle):
@@ -64,8 +64,6 @@ def get_correction(cigar):
6464
"""Given a cigar string, return the position correction for this alignment
6565
for the central nucleotide position
6666
"""
67-
#print()
68-
#print(cigar)
6967
insertion_length = 0
7068
deletion_length = 0
7169

@@ -82,8 +80,6 @@ def get_correction(cigar):
8280
elif k == "I":
8381
insertion_length += l
8482

85-
#print(c, length_so_far, insertion_length, deletion_length)
86-
8783
if length_so_far > half_length:
8884
break
8985

@@ -115,9 +111,7 @@ def get_correction(cigar):
115111

116112
corr[chrom1][pos1] = [chrom2, pos2, cigar]
117113

118-
#for chrom in corr:
119-
# for pos in corr[chrom]:
120-
# print(chrom, pos, corr[chrom][pos])
114+
print("Done with corr file")
121115

122116
# Read vcf_file
123117
vcf = defaultdict(lambda: defaultdict(list))
@@ -134,12 +128,11 @@ def get_correction(cigar):
134128

135129
vcf[chrom][pos] = alleles
136130

137-
#for chrom in vcf:
138-
# for pos in vcf[chrom]:
139-
# print(chrom, pos, vcf[chrom][pos])
131+
print("Done with VCF file")
140132

141133
# Load genome_file in memory
142134
sequences = {x.name: x.sequence for x in fasta_iterator(genome_file)}
135+
print("Done with genome file")
143136

144137
# Explore position correction
145138
for chrom1 in corr:
@@ -151,8 +144,7 @@ def get_correction(cigar):
151144
if len(a[0]) == len(a[1]) and len(a[0]) == 1:
152145
alleles = vcf[chrom2][pos2]
153146

154-
if cigar != "201M":
155-
correction = get_correction(cigar)
156-
nuc = sequences[chrom2][pos2-1+correction]
157-
region = sequences[chrom2][(pos2-1+correction-20):(pos2-1+correction+20)]
158-
print(chrom1, pos1, chrom2, pos2, cigar, alleles, nuc, nuc in alleles, region)
147+
correction = get_correction(cigar)
148+
nuc = sequences[chrom2][pos2-1+correction].upper()
149+
region = sequences[chrom2][(pos2-1+correction-20):(pos2-1+correction+20)].upper()
150+
print(chrom1, pos1, chrom2, pos2, cigar, alleles, nuc, nuc in alleles, region)

0 commit comments

Comments
 (0)