@@ -32,18 +32,22 @@ def phase_nearby_variants(
3232 # no phasable variants
3333 variants_to_phase = contig .mismatches + contig .non_target_indels
3434 if not variants_to_phase :
35- return make_target_obj_from_contig (target , indexed_contig )
35+ return make_target_obj_from_contig (target , indexed_contig )
3636
3737 # phase all phasables within the target exon (hard phasing)
3838 if hard :
3939 variants_list = []
40- cleaned , variant_list = precleaning (indexed_contig , variants_list , target_pos_on_contig , pileup , target )
40+ cleaned , variant_list = precleaning (
41+ indexed_contig , variants_list , target_pos_on_contig , pileup , target
42+ )
4143 return greedy_phasing (target , cleaned )
4244 else :
43- indexed_contig , variants_to_phase = precleaning (indexed_contig , variants_to_phase , target_pos_on_contig , pileup )
45+ indexed_contig , variants_to_phase = precleaning (
46+ indexed_contig , variants_to_phase , target_pos_on_contig , pileup
47+ )
4448
4549 if not variants_to_phase :
46- return make_target_obj_from_contig (target , indexed_contig )
50+ return make_target_obj_from_contig (target , indexed_contig )
4751 else :
4852 variants_in_non_targets , mut_frac = variants_in_non_target_pileup (
4953 pileup , target , basequalthresh , to_complex
@@ -69,13 +73,25 @@ def phase_nearby_variants(
6973
7074 remove_deletables (indexed_contig , lt_end , target_pos_on_contig , rt_end )
7175
72- mismatches_to_phase = [var for var in variants_to_phase if not var .is_indel and indexed_contig .get (var .pos , False )]
73- non_target_indels_to_phase = [var for var in variants_to_phase if var .is_indel and indexed_contig .get (var .pos , False ) and var != target ]
76+ mismatches_to_phase = [
77+ var
78+ for var in variants_to_phase
79+ if not var .is_indel and indexed_contig .get (var .pos , False )
80+ ]
81+ non_target_indels_to_phase = [
82+ var
83+ for var in variants_to_phase
84+ if var .is_indel and indexed_contig .get (var .pos , False ) and var != target
85+ ]
7486
7587 if variants_to_phase :
7688 if not non_target_indels_to_phase :
7789 peak_locs = locate_mismatch_cluster_peaks (
78- indexed_contig , mismatches_to_phase , target , snv_neighborhood , to_complex
90+ indexed_contig ,
91+ mismatches_to_phase ,
92+ target ,
93+ snv_neighborhood ,
94+ to_complex ,
7995 )
8096
8197 if peak_locs :
@@ -93,10 +109,20 @@ def phase_nearby_variants(
93109 if max (target_len , non_target_max_len ) < 4 :
94110 indel_neighborhood = int (indel_neighborhood / 2 ) + 1
95111
96- remove_common_substrings (indexed_contig , target_pos_on_contig , indel_neighborhood )
112+ remove_common_substrings (
113+ indexed_contig , target_pos_on_contig , indel_neighborhood
114+ )
97115
98- lt_end = end_point (indexed_contig , mismatches_to_phase , target , snv_neighborhood , left = True )
99- rt_end = end_point (indexed_contig , mismatches_to_phase , target , snv_neighborhood , left = False )
116+ lt_end = end_point (
117+ indexed_contig , mismatches_to_phase , target , snv_neighborhood , left = True
118+ )
119+ rt_end = end_point (
120+ indexed_contig ,
121+ mismatches_to_phase ,
122+ target ,
123+ snv_neighborhood ,
124+ left = False ,
125+ )
100126
101127 remove_deletables (indexed_contig , lt_end , target_pos_on_contig , rt_end )
102128
@@ -111,10 +137,13 @@ def phase_nearby_variants(
111137def make_target_obj_from_contig (target , indexed_contig ):
112138 try :
113139 data = indexed_contig [target .pos ]
114- return Variant (target .chrom , target .pos , data [0 ], data [1 ], target .reference ).normalize ()
140+ return Variant (
141+ target .chrom , target .pos , data [0 ], data [1 ], target .reference
142+ ).normalize ()
115143 except :
116144 return target .normalize ()
117145
146+
118147def greedy_phasing (target , indexed_contig ):
119148
120149 cpos = 0
@@ -142,7 +171,9 @@ def seq_complexity(contig, snv_neighborhood, indel_neighorhood):
142171 )
143172
144173
145- def precleaning (genome_indexed_contig , variants_list , target_pos , pileup , limit_to_target_exon = True ):
174+ def precleaning (
175+ genome_indexed_contig , variants_list , target_pos , pileup , limit_to_target_exon = True
176+ ):
146177 lt_loci , rt_loci = [], []
147178
148179 # filter low qual loci
@@ -234,9 +265,10 @@ def locate_mismatch_cluster_peaks(
234265 else :
235266 return None
236267
237-
238268 lt_peak_pos = target .pos if lt_peak_pos == - np .inf else lt_peak_pos
239- rt_peak_pos = target .pos + len (target .ref ) - 1 if rt_peak_pos == np .inf else rt_peak_pos
269+ rt_peak_pos = (
270+ target .pos + len (target .ref ) - 1 if rt_peak_pos == np .inf else rt_peak_pos
271+ )
240272
241273 return (lt_peak_pos - 1 , rt_peak_pos + 1 )
242274
@@ -248,7 +280,7 @@ def calc_peak(indexed_contig, mismatches, target, snv_neighborhood, left):
248280 loci = [k for k , v in indexed_contig .items () if k <= target_pos ][::- 1 ]
249281 snv_loci = [var .pos for var in mismatches if var .pos < target_pos ]
250282 else :
251- del_adjust = len (target .ref ) - 1
283+ del_adjust = len (target .ref ) - 1
252284 loci = [k for k , v in indexed_contig .items () if k > target_pos + del_adjust ]
253285 snv_loci = [var .pos for var in mismatches if var .pos > target_pos ]
254286
@@ -319,7 +351,7 @@ def variants_in_non_target_pileup(pileup, target, basequalthresh, to_complex):
319351 nontarget_pileup = [
320352 findall_mismatches (read , end_trim = 10 )
321353 for read in pileup
322- if not read ["is_target" ] and read ["is_covering" ] and not read ["is_dirty" ]
354+ if not read ["is_target" ] and read ["is_covering" ] and not read ["is_dirty" ]
323355 ]
324356
325357 if not nontarget_pileup :
@@ -340,27 +372,26 @@ def variants_in_non_target_pileup(pileup, target, basequalthresh, to_complex):
340372 indels = [
341373 indel
342374 for indel , cnt in Counter (indels ).items ()
343- if (cnt > 2 and cnt / len (nontarget_pileup ) > 0.15 )
344- or cnt > 5
375+ if (cnt > 2 and cnt / len (nontarget_pileup ) > 0.15 ) or cnt > 5
345376 ]
346377
347378 mismatches = [
348379 Variant (target .chrom , v [0 ], v [1 ], v [2 ], target .reference )
349380 for read in nontarget_pileup
350- for v in read ["mismatches" ] if v [3 ] > basequalthresh
381+ for v in read ["mismatches" ]
382+ if v [3 ] > basequalthresh
351383 ]
352384
353- nontarget_pileup_vol = sum (
354- max (0 , len (read ["ref_seq" ]) - 20 ) for read in nontarget_pileup
355- ) + 1
385+ nontarget_pileup_vol = (
386+ sum ( max (0 , len (read ["ref_seq" ]) - 20 ) for read in nontarget_pileup ) + 1
387+ )
356388
357389 mutation_frac = (len (mismatches ) + len (indels )) / nontarget_pileup_vol
358390
359391 mismatches = [
360392 var
361393 for var , cnt in Counter (mismatches ).items ()
362- if (cnt > 2 and cnt / len (nontarget_pileup ) > 0.15 )
363- or cnt > 5
394+ if (cnt > 2 and cnt / len (nontarget_pileup ) > 0.15 ) or cnt > 5
364395 ]
365396
366397 return set (indels + mismatches ), mutation_frac
@@ -396,9 +427,9 @@ def get_freq(freqinfo):
396427def remove_deletables (indexed_contig , lt_end , target_pos , rt_end ):
397428 tmp = indexed_contig .copy ()
398429
399- #if lt_end == -np.inf:
430+ # if lt_end == -np.inf:
400431 # lt_end = target_pos - 1
401- #if rt_end == np.inf:
432+ # if rt_end == np.inf:
402433 # rt_end = target_pos + 1
403434
404435 for k , v in tmp .items ():
@@ -448,7 +479,6 @@ def trim_common(indexed_contig, commons, max_common_str_len, left):
448479 else :
449480 start = search_nearest_lt_locus (indexed_contig , sub_str [0 ], left )
450481
451-
452482 end = sub_str [- 1 ]
453483 start_event = indexed_contig [start ]
454484 end_event = indexed_contig [end ]
@@ -579,9 +609,7 @@ def end_point(indexed_contig, mismatches, target, snv_neighborhood, left):
579609 return peak_pos + 1
580610
581611
582-
583612def get_end_most_indel (indexed_contig , target ):
584613 for k , v in indexed_contig .items ():
585614 if len (v [0 ]) != len (v [1 ]):
586615 return Variant (target .chrom , k , v [0 ], v [1 ], target .reference )
587-
0 commit comments