Skip to content

Commit bad9390

Browse files
committed
Fix #94
1 parent 518afe7 commit bad9390

File tree

22 files changed

+489
-231
lines changed

22 files changed

+489
-231
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,4 @@ aldy_old/*
4040
scripts
4141
aldy/indelpost/*.c
4242
build/*
43+
/_*

aldy/__main__.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,15 @@
3131
def get_version():
3232
return "{} {}".format(
3333
platform.system() if platform.system() != "Darwin" else "macOS",
34-
platform.platform()[6:]
35-
if platform.system() == "Linux"
36-
else platform.mac_ver()[0]
37-
if platform.system() == "Darwin"
38-
else platform.platform(),
34+
(
35+
platform.platform()[6:]
36+
if platform.system() == "Linux"
37+
else (
38+
platform.mac_ver()[0]
39+
if platform.system() == "Darwin"
40+
else platform.platform()
41+
)
42+
),
3943
)
4044

4145

aldy/common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
from typing import Iterable, Any, List
8-
import pkg_resources
8+
import importlib.resources
99
import re
1010
import time
1111
import pprint
@@ -115,7 +115,7 @@ def samtools(self, pad_left=500, pad_right=1, prefix="") -> str:
115115
:param prefix: Chromosome prefix."""
116116

117117
return "{}:{}-{}".format(
118-
prefix + self.chr, self.start - pad_left, self.end + pad_right
118+
prefix + self.chr, max(1, self.start - pad_left), self.end + pad_right
119119
)
120120

121121
def __str__(self):
@@ -202,7 +202,7 @@ def script_path(key: str) -> str:
202202
components = key.split("/")
203203
if len(components) < 2:
204204
raise AldyException(f'"{key}"" is not valid resource name')
205-
return pkg_resources.resource_filename(components[0], "/".join(components[1:]))
205+
return str(importlib.resources.files(components[0]) / "/".join(components[1:]))
206206

207207

208208
def colorize(text: str, color: str = "green") -> str:

aldy/diplotype.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,15 +176,19 @@ def write_vcf(
176176
"GT": "|".join(str(all_mutations[m][mi][i]) for i in range(nall)),
177177
"DP": str(coverage[m]),
178178
"MA": ",".join(
179-
f"*{minor.solution[i].major}"
180-
if all_mutations[m][mi][i] > 0
181-
else "-"
179+
(
180+
f"*{minor.solution[i].major}"
181+
if all_mutations[m][mi][i] > 0
182+
else "-"
183+
)
182184
for i in range(nall)
183185
),
184186
"MI": ",".join(
185-
f"*{minor.solution[i].minor}"
186-
if all_mutations[m][mi][i] > 0
187-
else "-"
187+
(
188+
f"*{minor.solution[i].minor}"
189+
if all_mutations[m][mi][i] > 0
190+
else "-"
191+
)
188192
for i in range(nall)
189193
),
190194
}
@@ -307,6 +311,7 @@ def key(x):
307311
solution.set_diplotype(diplotype)
308312
return diplotype
309313

314+
310315
def estimate_cpic(gene: Gene, solution: MinorSolution) -> str:
311316
"""Calculate the CPIC functionality for a minor solution.
312317
:returns: CPIC functionality.
@@ -335,6 +340,3 @@ def estimate_cpic(gene: Gene, solution: MinorSolution) -> str:
335340
if len(i) == 1:
336341
return (0, i[0])
337342
return (0, "indeterminate")
338-
339-
340-

aldy/gene.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,11 @@ def get_functional(self, mut, infer=True) -> Optional[str]:
280280
if self.seq[pos] != op[0]:
281281
log.warn(f"Bad mutation: {op[0]} != {self.seq[pos]}")
282282
seq = "".join(
283-
self.seq[s:pos] + op[2] + self.seq[pos + 1 : e]
284-
if s <= pos < e
285-
else self.seq[s:e]
283+
(
284+
self.seq[s:pos] + op[2] + self.seq[pos + 1 : e]
285+
if s <= pos < e
286+
else self.seq[s:e]
287+
)
286288
for s, e in self.exons
287289
)
288290
amino = seq_to_amino(seq)
@@ -409,6 +411,8 @@ def _init_basic(self, yml) -> None:
409411
seq[pos - 1] = nuc
410412
self.seq = "".join(seq)
411413

414+
if self.genome not in yml["reference"]["mappings"]:
415+
raise AldyException("Gene {self.name} not compatible with {self.genome}")
412416
self.chr, start, end, strand, cigar = yml["reference"]["mappings"][self.genome]
413417
self.chr_to_ref = {}
414418
self.ref_to_chr = {}
@@ -433,12 +437,14 @@ def _init_basic(self, yml) -> None:
433437
self._lookup_range = (start - 1, end - 1)
434438
self._lookup_seq = "".join(
435439
(
436-
rev_comp(self.seq[self.chr_to_ref[i]])
437-
if self.strand < 0
438-
else self.seq[self.chr_to_ref[i]]
440+
(
441+
rev_comp(self.seq[self.chr_to_ref[i]])
442+
if self.strand < 0
443+
else self.seq[self.chr_to_ref[i]]
444+
)
445+
if i in self.chr_to_ref
446+
else "N"
439447
)
440-
if i in self.chr_to_ref
441-
else "N"
442448
for i in range(*self._lookup_range)
443449
)
444450

@@ -474,7 +480,7 @@ def _init_regions(self, yml) -> None:
474480
for name, coord in yml["structure"]["regions"][self.genome].items():
475481
if name[0] == "e" and name[1:].isdigit(): # this is exon
476482
num_exons += 1
477-
if not coord[i * 2] <= coord[i * 2 + 1]:
483+
if i * 2 + 1 < len(coord) and not coord[i * 2] <= coord[i * 2 + 1]:
478484
raise AldyException(
479485
f"Malformed YML file {self.name} (structure:regions:{name})"
480486
)

aldy/genotype.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import List, Optional, Any, Set, Dict
88
import os
99
import sys
10-
import pkg_resources
10+
import importlib.resources
1111
import datetime
1212
import time
1313

@@ -98,17 +98,17 @@ def genotype(
9898

9999
avail_genes = []
100100
if gene_db == "all":
101-
avail_genes = pkg_resources.resource_listdir("aldy.resources", "genes")
101+
avail_genes = importlib.resources.files("aldy.resources.genes").iterdir()
102102
avail_genes = [
103103
i[:-4]
104104
for i in avail_genes
105105
if len(i) > 4 and i.endswith(".yml") and not i.startswith("pharma-")
106106
]
107107
avail_genes = sorted(avail_genes)
108108
elif gene_db == "pharmacoscan":
109-
avail_genes = pkg_resources.resource_listdir(
110-
"aldy.resources.genes", "pharmacoscan"
111-
)
109+
avail_genes = importlib.resources.files(
110+
"aldy.resources.genes.pharmacoscan"
111+
).iterdir()
112112
avail_genes = [
113113
f"pharmacoscan/{i[:-4]}" for i in avail_genes if i.endswith(".yml")
114114
]
@@ -362,7 +362,10 @@ def genotype(
362362
cpic += f"; cpic={cpic_fun}"
363363
if gene.cpic_scores and cpic_fun != "indeterminate":
364364
cpic += f"; cpic_score={cpic_score}"
365-
print(f"#Solution {i + 1}: {minor_sol._solution_nice()}{cpic}", file=output_file)
365+
print(
366+
f"#Solution {i + 1}: {minor_sol._solution_nice()}{cpic}",
367+
file=output_file,
368+
)
366369
diplotype.write_decomposition(
367370
sample.name, gene, sample.coverage, i + 1, minor_sol, output_file
368371
)

aldy/indelpost/alleles.py

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,22 @@ def phase_nearby_variants(
3232
# no phasable variants
3333
variants_to_phase = contig.mismatches + contig.non_target_indels
3434
if not variants_to_phase:
35-
return make_target_obj_from_contig(target, indexed_contig)
35+
return make_target_obj_from_contig(target, indexed_contig)
3636

3737
# phase all phasables within the target exon (hard phasing)
3838
if hard:
3939
variants_list = []
40-
cleaned, variant_list = precleaning(indexed_contig, variants_list, target_pos_on_contig, pileup, target)
40+
cleaned, variant_list = precleaning(
41+
indexed_contig, variants_list, target_pos_on_contig, pileup, target
42+
)
4143
return greedy_phasing(target, cleaned)
4244
else:
43-
indexed_contig, variants_to_phase = precleaning(indexed_contig, variants_to_phase, target_pos_on_contig, pileup)
45+
indexed_contig, variants_to_phase = precleaning(
46+
indexed_contig, variants_to_phase, target_pos_on_contig, pileup
47+
)
4448

4549
if not variants_to_phase:
46-
return make_target_obj_from_contig(target, indexed_contig)
50+
return make_target_obj_from_contig(target, indexed_contig)
4751
else:
4852
variants_in_non_targets, mut_frac = variants_in_non_target_pileup(
4953
pileup, target, basequalthresh, to_complex
@@ -69,13 +73,25 @@ def phase_nearby_variants(
6973

7074
remove_deletables(indexed_contig, lt_end, target_pos_on_contig, rt_end)
7175

72-
mismatches_to_phase = [var for var in variants_to_phase if not var.is_indel and indexed_contig.get(var.pos, False)]
73-
non_target_indels_to_phase = [var for var in variants_to_phase if var.is_indel and indexed_contig.get(var.pos, False) and var != target]
76+
mismatches_to_phase = [
77+
var
78+
for var in variants_to_phase
79+
if not var.is_indel and indexed_contig.get(var.pos, False)
80+
]
81+
non_target_indels_to_phase = [
82+
var
83+
for var in variants_to_phase
84+
if var.is_indel and indexed_contig.get(var.pos, False) and var != target
85+
]
7486

7587
if variants_to_phase:
7688
if not non_target_indels_to_phase:
7789
peak_locs = locate_mismatch_cluster_peaks(
78-
indexed_contig, mismatches_to_phase, target, snv_neighborhood, to_complex
90+
indexed_contig,
91+
mismatches_to_phase,
92+
target,
93+
snv_neighborhood,
94+
to_complex,
7995
)
8096

8197
if peak_locs:
@@ -93,10 +109,20 @@ def phase_nearby_variants(
93109
if max(target_len, non_target_max_len) < 4:
94110
indel_neighborhood = int(indel_neighborhood / 2) + 1
95111

96-
remove_common_substrings(indexed_contig, target_pos_on_contig, indel_neighborhood)
112+
remove_common_substrings(
113+
indexed_contig, target_pos_on_contig, indel_neighborhood
114+
)
97115

98-
lt_end = end_point(indexed_contig, mismatches_to_phase, target, snv_neighborhood, left=True)
99-
rt_end = end_point(indexed_contig, mismatches_to_phase, target, snv_neighborhood, left=False)
116+
lt_end = end_point(
117+
indexed_contig, mismatches_to_phase, target, snv_neighborhood, left=True
118+
)
119+
rt_end = end_point(
120+
indexed_contig,
121+
mismatches_to_phase,
122+
target,
123+
snv_neighborhood,
124+
left=False,
125+
)
100126

101127
remove_deletables(indexed_contig, lt_end, target_pos_on_contig, rt_end)
102128

@@ -111,10 +137,13 @@ def phase_nearby_variants(
111137
def make_target_obj_from_contig(target, indexed_contig):
112138
try:
113139
data = indexed_contig[target.pos]
114-
return Variant(target.chrom, target.pos, data[0], data[1], target.reference).normalize()
140+
return Variant(
141+
target.chrom, target.pos, data[0], data[1], target.reference
142+
).normalize()
115143
except:
116144
return target.normalize()
117145

146+
118147
def greedy_phasing(target, indexed_contig):
119148

120149
cpos = 0
@@ -142,7 +171,9 @@ def seq_complexity(contig, snv_neighborhood, indel_neighorhood):
142171
)
143172

144173

145-
def precleaning(genome_indexed_contig, variants_list, target_pos, pileup, limit_to_target_exon=True):
174+
def precleaning(
175+
genome_indexed_contig, variants_list, target_pos, pileup, limit_to_target_exon=True
176+
):
146177
lt_loci, rt_loci = [], []
147178

148179
# filter low qual loci
@@ -234,9 +265,10 @@ def locate_mismatch_cluster_peaks(
234265
else:
235266
return None
236267

237-
238268
lt_peak_pos = target.pos if lt_peak_pos == -np.inf else lt_peak_pos
239-
rt_peak_pos = target.pos + len(target.ref) - 1 if rt_peak_pos == np.inf else rt_peak_pos
269+
rt_peak_pos = (
270+
target.pos + len(target.ref) - 1 if rt_peak_pos == np.inf else rt_peak_pos
271+
)
240272

241273
return (lt_peak_pos - 1, rt_peak_pos + 1)
242274

@@ -248,7 +280,7 @@ def calc_peak(indexed_contig, mismatches, target, snv_neighborhood, left):
248280
loci = [k for k, v in indexed_contig.items() if k <= target_pos][::-1]
249281
snv_loci = [var.pos for var in mismatches if var.pos < target_pos]
250282
else:
251-
del_adjust = len(target.ref) -1
283+
del_adjust = len(target.ref) - 1
252284
loci = [k for k, v in indexed_contig.items() if k > target_pos + del_adjust]
253285
snv_loci = [var.pos for var in mismatches if var.pos > target_pos]
254286

@@ -319,7 +351,7 @@ def variants_in_non_target_pileup(pileup, target, basequalthresh, to_complex):
319351
nontarget_pileup = [
320352
findall_mismatches(read, end_trim=10)
321353
for read in pileup
322-
if not read["is_target" ] and read["is_covering"] and not read["is_dirty"]
354+
if not read["is_target"] and read["is_covering"] and not read["is_dirty"]
323355
]
324356

325357
if not nontarget_pileup:
@@ -340,27 +372,26 @@ def variants_in_non_target_pileup(pileup, target, basequalthresh, to_complex):
340372
indels = [
341373
indel
342374
for indel, cnt in Counter(indels).items()
343-
if (cnt > 2 and cnt/ len(nontarget_pileup) > 0.15)
344-
or cnt > 5
375+
if (cnt > 2 and cnt / len(nontarget_pileup) > 0.15) or cnt > 5
345376
]
346377

347378
mismatches = [
348379
Variant(target.chrom, v[0], v[1], v[2], target.reference)
349380
for read in nontarget_pileup
350-
for v in read["mismatches"] if v[3] > basequalthresh
381+
for v in read["mismatches"]
382+
if v[3] > basequalthresh
351383
]
352384

353-
nontarget_pileup_vol = sum(
354-
max(0, len(read["ref_seq"]) - 20) for read in nontarget_pileup
355-
) + 1
385+
nontarget_pileup_vol = (
386+
sum(max(0, len(read["ref_seq"]) - 20) for read in nontarget_pileup) + 1
387+
)
356388

357389
mutation_frac = (len(mismatches) + len(indels)) / nontarget_pileup_vol
358390

359391
mismatches = [
360392
var
361393
for var, cnt in Counter(mismatches).items()
362-
if (cnt > 2 and cnt / len(nontarget_pileup) > 0.15)
363-
or cnt > 5
394+
if (cnt > 2 and cnt / len(nontarget_pileup) > 0.15) or cnt > 5
364395
]
365396

366397
return set(indels + mismatches), mutation_frac
@@ -396,9 +427,9 @@ def get_freq(freqinfo):
396427
def remove_deletables(indexed_contig, lt_end, target_pos, rt_end):
397428
tmp = indexed_contig.copy()
398429

399-
#if lt_end == -np.inf:
430+
# if lt_end == -np.inf:
400431
# lt_end = target_pos - 1
401-
#if rt_end == np.inf:
432+
# if rt_end == np.inf:
402433
# rt_end = target_pos + 1
403434

404435
for k, v in tmp.items():
@@ -448,7 +479,6 @@ def trim_common(indexed_contig, commons, max_common_str_len, left):
448479
else:
449480
start = search_nearest_lt_locus(indexed_contig, sub_str[0], left)
450481

451-
452482
end = sub_str[-1]
453483
start_event = indexed_contig[start]
454484
end_event = indexed_contig[end]
@@ -579,9 +609,7 @@ def end_point(indexed_contig, mismatches, target, snv_neighborhood, left):
579609
return peak_pos + 1
580610

581611

582-
583612
def get_end_most_indel(indexed_contig, target):
584613
for k, v in indexed_contig.items():
585614
if len(v[0]) != len(v[1]):
586615
return Variant(target.chrom, k, v[0], v[1], target.reference)
587-

0 commit comments

Comments
 (0)