@@ -17,7 +17,7 @@ class Fasta(object):
1717 """
1818
1919 def __init__ (self , name , sequence ):
20- self .name = name
20+ self .name = name . split ( " " )[ 0 ]
2121 self .sequence = sequence
2222
2323 def write_to_file (self , handle ):
@@ -64,8 +64,6 @@ def get_correction(cigar):
6464 """Given a cigar string, return the position correction for this alignment
6565 for the central nucleotide position
6666 """
67- #print()
68- #print(cigar)
6967 insertion_length = 0
7068 deletion_length = 0
7169
@@ -82,8 +80,6 @@ def get_correction(cigar):
8280 elif k == "I" :
8381 insertion_length += l
8482
85- #print(c, length_so_far, insertion_length, deletion_length)
86-
8783 if length_so_far > half_length :
8884 break
8985
@@ -115,9 +111,7 @@ def get_correction(cigar):
115111
116112 corr [chrom1 ][pos1 ] = [chrom2 , pos2 , cigar ]
117113
118- #for chrom in corr:
119- # for pos in corr[chrom]:
120- # print(chrom, pos, corr[chrom][pos])
114+ print ("Done with corr file" )
121115
122116# Read vcf_file
123117vcf = defaultdict (lambda : defaultdict (list ))
@@ -134,12 +128,11 @@ def get_correction(cigar):
134128
135129 vcf [chrom ][pos ] = alleles
136130
137- #for chrom in vcf:
138- # for pos in vcf[chrom]:
139- # print(chrom, pos, vcf[chrom][pos])
131+ print ("Done with VCF file" )
140132
141133# Load genome_file in memory
142134sequences = {x .name : x .sequence for x in fasta_iterator (genome_file )}
135+ print ("Done with genome file" )
143136
144137# Explore position correction
145138for chrom1 in corr :
@@ -151,8 +144,7 @@ def get_correction(cigar):
151144 if len (a [0 ]) == len (a [1 ]) and len (a [0 ]) == 1 :
152145 alleles = vcf [chrom2 ][pos2 ]
153146
154- if cigar != "201M" :
155- correction = get_correction (cigar )
156- nuc = sequences [chrom2 ][pos2 - 1 + correction ]
157- region = sequences [chrom2 ][(pos2 - 1 + correction - 20 ):(pos2 - 1 + correction + 20 )]
158- print (chrom1 , pos1 , chrom2 , pos2 , cigar , alleles , nuc , nuc in alleles , region )
147+ correction = get_correction (cigar )
148+ nuc = sequences [chrom2 ][pos2 - 1 + correction ].upper ()
149+ region = sequences [chrom2 ][(pos2 - 1 + correction - 20 ):(pos2 - 1 + correction + 20 )].upper ()
150+ print (chrom1 , pos1 , chrom2 , pos2 , cigar , alleles , nuc , nuc in alleles , region )
0 commit comments