Skip to content

Commit dc34bab

Browse files
authored
Merge pull request #13 from pbashyal-nmdp/cython_fix
Cython fix to build the package
2 parents ce7d8ea + 02334ee commit dc34bab

File tree

16 files changed

+173
-44
lines changed

16 files changed

+173
-44
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,6 @@ dmypy.json
134134
# behave
135135
pretty.output
136136
allure_report/
137+
138+
# cython temp files
139+
grim/**/*.c

MANIFEST.in

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
include requirements.txt
2+
include README.md
3+
include LICENSE
4+
include *.txt
5+
recursive-include grim *.py
6+
recursive-include grim *.txt
7+
recursive-include grim *.json
8+
recursive-include grim *.pyx
9+
recursive-include grim *.pyd

grim/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@
2626
"""Top-level package for py-grim."""
2727

2828
__organization__ = "NMDP/CIBMTR Bioinformatics"
29-
__version__ = "0.0.6"
29+
__version__ = "0.0.7"

grim/conf/__init__.py

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@
2323
#
2424

2525

26-
"""Top-level package for py-grim."""
27-
28-
__organization__ = "NMDP/CIBMTR Bioinformatics"
26+
__author__ = """Martin Maiers"""
27+
__email__ = "[email protected]"
28+
__version__ = "0.0.7"

grim/imputation/__init__.py

100644100755
Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
# -*- coding: utf-8 -*-
22

3-
#
4-
# grim Graph Imputation
5-
# Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
63
#
74
# This library is free software; you can redistribute it and/or modify it
85
# under the terms of the GNU Lesser General Public License as published
96
# by the Free Software Foundation; either version 3 of the License, or (at
107
# your option) any later version.
118
#
129
# This library is distributed in the hope that it will be useful, but WITHOUT
13-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10+
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
1411
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1512
# License for more details.
1613
#
@@ -23,6 +20,6 @@
2320
#
2421

2522

26-
"""Top-level package for py-grim."""
27-
28-
__organization__ = "NMDP/CIBMTR Bioinformatics"
23+
__author__ = """Martin Maiers"""
24+
__email__ = "[email protected]"
25+
__version__ = "0.0.7"

grim/imputation/graph_generation/__init__.py

100644100755
Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
# -*- coding: utf-8 -*-
22

3-
#
4-
# grim Graph Imputation
5-
# Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
63
#
74
# This library is free software; you can redistribute it and/or modify it
85
# under the terms of the GNU Lesser General Public License as published
96
# by the Free Software Foundation; either version 3 of the License, or (at
107
# your option) any later version.
118
#
129
# This library is distributed in the hope that it will be useful, but WITHOUT
13-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10+
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
1411
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1512
# License for more details.
1613
#
@@ -23,6 +20,6 @@
2320
#
2421

2522

26-
"""Top-level package for py-grim."""
27-
28-
__organization__ = "NMDP/CIBMTR Bioinformatics"
23+
__author__ = """Martin Maiers"""
24+
__email__ = "[email protected]"
25+
__version__ = "0.0.7"

grim/imputation/imputegl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424

2525
__author__ = """Martin Maiers"""
2626
__email__ = "[email protected]"
27-
__version__ = "0.0.4"
27+
__version__ = "0.0.7"
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import cython
2+
3+
4+
@cython.boundscheck(False)
5+
@cython.wraparound(False)
6+
cpdef open_ambiguities(list hap, unsigned char loc, tuple split_loc):
7+
cdef unsigned int k, i, p, j #hap_len, haps_len, splits_len
8+
cdef Py_ssize_t hap_len, haps_len, splits_len
9+
cdef list hap_new, hap1
10+
# cdef np.ndarray[STR, ndim=1] hap_new, hap1
11+
p = 0
12+
if len(split_loc) > 1:
13+
# This opens all allele ambiguities
14+
hap_len = len(hap[0])
15+
haps_len = len(hap)
16+
splits_len = len(split_loc)
17+
hap_new = [None] * (haps_len * splits_len)
18+
# hap_new = np.empty(haps_len * splits_len, dtype=np.object) # produces an empty list of haplotypes
19+
hap1 = [None] * hap_len
20+
# hap1 = np.empty(haps_len, dtype=np.object)
21+
for k in range(haps_len): # split a given locus in all haps.
22+
23+
for j in range(hap_len):
24+
hap1[j] = hap[k][j]
25+
26+
for i in range(splits_len):
27+
hap1[loc] = split_loc[i]
28+
hap_new[p] = hap1[:]
29+
p += 1
30+
return hap_new
31+
return hap
32+
33+
@cython.boundscheck(False)
34+
@cython.wraparound(False)
35+
cpdef create_hap_list(list all_haps, dict optionDict, unsigned int N_Loc):
36+
cdef unsigned int i, j, count
37+
cdef list hap_list = []
38+
cdef list all_hap_split
39+
40+
for i in range(len(all_haps)):
41+
all_hap_split = all_haps[i].split('~')
42+
count = 0
43+
for j in range(len(all_hap_split)):
44+
if all_hap_split[j] not in optionDict:
45+
break
46+
else:
47+
count += 1
48+
49+
if count == N_Loc:
50+
hap_list.append(all_hap_split)
51+
return hap_list
52+
53+
@cython.boundscheck(False)
54+
@cython.wraparound(False)
55+
cpdef deepcopy_list(list l):
56+
cdef list copy_l
57+
cdef unsigned int i, length
58+
length = len(l)
59+
copy_l = [None] * length
60+
for i in range(length):
61+
if isinstance(l[i], list):
62+
copy_l[i] = deepcopy_list(l[i])
63+
else:
64+
copy_l[i] = l[i]
65+
return copy_l

grim/imputation/imputegl/impute.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import os.path
88
import json
99

10-
import numpy as np
1110

11+
import numpy as np
12+
from .cutils import open_ambiguities, create_hap_list, deepcopy_list
1213
from .cypher_plan_b import CypherQueryPlanB
1314
from .cypher_query import CypherQuery
1415

@@ -118,6 +119,31 @@ def clean_up_gl(gl):
118119

119120

120121
class Imputation(object):
122+
__slots__ = (
123+
"logger",
124+
"verbose",
125+
"populations",
126+
"netGraph",
127+
"priorMatrix",
128+
"full_hapl",
129+
"index_dict",
130+
"full_loci",
131+
"factor",
132+
"_factor_missing_data",
133+
"cypher",
134+
"cypher_plan_b",
135+
"matrix_planb",
136+
"count_by_prob",
137+
"number_of_options_threshold",
138+
"plan",
139+
"option_1",
140+
"option_2",
141+
"haplotypes_number_in_phase",
142+
"save_space_mode",
143+
"nodes_for_plan_A",
144+
"unk_priors",
145+
)
146+
121147
def __init__(self, net=None, config=None, count_by_prob=None, verbose=False):
122148
"""Constructor
123149
Intialize an instance of `Imputation` with a py2neo graph
@@ -914,8 +940,8 @@ def open_phases(self, haps, N_Loc, gl_string):
914940
fq = []
915941

916942
for k in range(2):
917-
hap_list = []
918-
hap_list.append(haps[j][k])
943+
hap_list = [haps[j][k]]
944+
hap_list_splits = [tuple(allele.split("/")) for allele in hap_list[0]]
919945

920946
# compute the number of options:
921947
options = 1
@@ -1590,12 +1616,12 @@ def comp_cand(
15901616
# probabilties and accumulate cartesian productEpsilon=0.0001
15911617
chr = self.gl2haps(gl_string)
15921618
if chr == []:
1593-
return
1619+
return None, None
15941620
# if we in 9-loci, check if the type input in valid format
15951621
if self.nodes_for_plan_A:
15961622
geno_type = self.input_type(chr["Genotype"][0])
15971623
if not geno_type in self.nodes_for_plan_A:
1598-
return
1624+
return None, None
15991625

16001626
n_loci = chr["N_Loc"]
16011627

@@ -1604,7 +1630,7 @@ def comp_cand(
16041630

16051631
# return if the result is empty (why would that be?)
16061632
if pmags == []:
1607-
return
1633+
return None, None
16081634

16091635
# res_muugs = {'Haps': 'NaN', 'Probs': 0}
16101636
res_muugs = {"MaxProb": 0, "Haps": {}, "Pops": {}}
@@ -1713,7 +1739,7 @@ def call_comp_phase_prob(self, epsilon, n, phases, chr, MUUG_output, planb):
17131739
epsilon /= 10
17141740
if epsilon < min_epsilon:
17151741
epsilon = 0.0
1716-
phases_planb = copy.deepcopy(phases)
1742+
phases_planb = deepcopy_list(phases)
17171743
# Find the option according to plan b
17181744
if MUUG_output:
17191745
res = self.comp_phase_prob_plan_b(

grim/imputation/imputegl/networkx_graph.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,19 @@ def missing(labelA, labelB):
99

1010

1111
class Graph(object):
12+
__slots__ = (
13+
"graph",
14+
"labelDict",
15+
"whole_graph",
16+
"full_loci",
17+
"nodes_plan_a",
18+
"nodes_plan_b",
19+
)
20+
1221
def __init__(self, config):
13-
self.graph = nx.Graph()
22+
self.graph = nx.DiGraph()
1423
self.labelDict = {}
15-
self.whole_graph = nx.Graph()
24+
self.whole_graph = nx.DiGraph()
1625
self.full_loci = config["full_loci"]
1726
self.nodes_plan_a, self.nodes_plan_b = [], []
1827
if config["nodes_for_plan_A"]:
@@ -63,8 +72,11 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
6372
if len(row) > 0:
6473
node1 = nodesDict[row[0]]
6574
node2 = nodesDict[row[1]]
66-
if node1 in self.graph.nodes() and node2 in self.graph.nodes():
67-
self.graph.add_edge(node1, node2)
75+
if node1 in self.graph and node2 in self.graph:
76+
if self.graph.nodes[node1]["label"] == self.full_loci:
77+
self.graph.add_edge(node2, node1)
78+
else:
79+
self.graph.add_edge(node1, node2)
6880

6981
edgesfile.close()
7082

@@ -88,7 +100,6 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
88100
self.whole_graph.add_edge(node1, node2, color=kind)
89101

90102
allEdgesfile.close()
91-
92103
nodesDict.clear()
93104

94105
# return all haplotype by specific label

0 commit comments

Comments
 (0)