Skip to content

Commit 54a1c0d

Browse files
committed
Correct distance calculation for disconnected nodes.
1 parent ddb8ebc commit 54a1c0d

File tree

2 files changed

+167
-37
lines changed

2 files changed

+167
-37
lines changed

brainx/metrics.py

Lines changed: 90 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,38 @@ def nodal_pathlengths(G, n_nodes):
3838
Returns
3939
-------
4040
nodal_means: numpy array
41-
Array of length n_nodes with each node's mean shortest path
42-
length to other nodes. For disconnected nodes, the value in
43-
this array is np.nan.
41+
An array with each node's mean shortest path length to all other
42+
nodes. The array is in ascending order of node labels.
43+
44+
Notes
45+
-----
46+
This function assumes the nodes are labeled 0 to n_nodes - 1.
47+
48+
Per the Brain Connectivity Toolbox for Matlab, the distance between
49+
one node and another that cannot be reached from it is set to
50+
infinity.
4451
4552
"""
4653
# float is the default dtype for np.zeros, but we'll choose it explicitly
4754
# in case numpy ever changes the default to something else.
4855
nodal_means = np.zeros(n_nodes, dtype=float)
4956
lengths = nx.all_pairs_shortest_path_length(G)
50-
for src, pair in lengths.iteritems():
51-
source_arr = np.array([val for targ, val in pair.iteritems() if src !=
52-
targ], dtype=float)
53-
if source_arr.size == 0:
54-
source_arr=np.array([np.nan])
55-
nodal_means[src]=source_arr.mean()
57+
# As stated in the Python documentation, "Keys and values are listed in an
58+
# arbitrary order which is non-random, varies across Python
59+
# implementations, and depends on the dictionary's history of insertions
60+
# and deletions." Thus, we cannot assume we'd traverse the nodes in
61+
# ascending order if we were to iterate through 'lengths'.
62+
node_labels = range(n_nodes)
63+
for src in node_labels:
64+
source_lengths = []
65+
for targ in node_labels:
66+
if src != targ:
67+
try:
68+
val = lengths[src][targ]
69+
except KeyError:
70+
val = np.inf
71+
source_lengths.append(val)
72+
nodal_means[src] = np.mean(source_lengths)
5673
return nodal_means
5774

5875

@@ -135,27 +152,54 @@ def glob_efficiency(G):
135152
of the network."""
136153

137154
return 1.0/path_lengths(G)
138-
139-
def nodal_efficiency(G):
140-
"""Compute array of global efficiency for the given graph.
141155

142-
Nodal efficiency: XXX - define."""
143-
144-
nodepaths=[]
145-
length = nx.all_pairs_shortest_path_length(G)
146-
for src,targets in length.iteritems():
147-
paths=[]
148-
for targ,val in targets.items():
149-
if src==targ:
150-
continue
151-
152-
paths.append(1.0/val)
153-
154-
nodepaths.append(np.mean(paths))
155-
156-
return np.array(nodepaths)
157156

158-
#@profile
157+
def nodal_efficiency(G, n_nodes):
158+
"""Return array with nodal efficiency for each node in G.
159+
160+
See Achard and Bullmore (2007, PLoS Comput Biol) for the definition
161+
of nodal efficiency.
162+
163+
Parameters
164+
----------
165+
G: networkx Graph
166+
An undirected graph.
167+
168+
n_nodes: integer
169+
Number of nodes in G.
170+
171+
Returns
172+
-------
173+
nodal_efficiencies: numpy array
174+
An array with the nodal efficiency for each node in G, in
175+
the order specified by node_labels. The array is in ascending
176+
order of node labels.
177+
178+
Notes
179+
-----
180+
This function assumes the nodes are labeled 0 to n_nodes - 1.
181+
182+
Per the Brain Connectivity Toolbox for Matlab, the distance between
183+
one node and another that cannot be reached from it is set to
184+
infinity.
185+
186+
"""
187+
nodal_efficiencies = np.zeros(n_nodes, dtype=float)
188+
lengths = nx.all_pairs_shortest_path_length(G)
189+
node_labels = range(n_nodes)
190+
for src in node_labels:
191+
inverse_paths = []
192+
for targ in node_labels:
193+
if src != targ:
194+
try:
195+
val = lengths[src][targ]
196+
except KeyError:
197+
val = np.inf
198+
inverse_paths.append(1.0 / val)
199+
nodal_efficiencies[src] = np.mean(inverse_paths)
200+
return nodal_efficiencies
201+
202+
159203
def local_efficiency(G):
160204
"""Compute array of global efficiency for the given grap.h
161205
@@ -290,17 +334,26 @@ def nodal_summaryOut(G, n_nodes):
290334
length), clust (clustering coefficient), b_cen (betweenness
291335
centrality), c_cen (closeness centrality), nod_eff (nodal
292336
efficiency), loc_eff (local efficiency), and deg (degree). The
293-
values are lists of metrics, in ascending order of node labels.
337+
values are arrays (or lists, in some cases) of metrics, in
338+
ascending order of node labels.
294339
295340
"""
296-
# nodal_pathlengths is needed because NetworkX's shortest_path_length
297-
# returns an empty list for disconnected nodes.
298-
lp = nodal_pathlengths(G,n_nodes)
299-
clust = np.array(nx.clustering(G).values())
300-
b_cen = np.array(nx.betweenness_centrality(G).values())
301-
c_cen = np.array(nx.closeness_centrality(G).values())
302-
nod_eff = nodal_efficiency(G)
341+
lp = nodal_pathlengths(G, n_nodes)
342+
# As stated in the Python documentation, "Keys and values are listed in an
343+
# arbitrary order which is non-random, varies across Python
344+
# implementations, and depends on the dictionary's history of insertions
345+
# and deletions." Thus, we cannot expect, e.g., nx.clustering(G)
346+
# to have the nodes listed in ascending order, as we desire.
347+
node_labels = range(n_nodes)
348+
clust_dict = nx.clustering(G)
349+
clust = np.array([clust_dict[n] for n in node_labels])
350+
b_cen_dict = nx.betweenness_centrality(G)
351+
b_cen = np.array([b_cen_dict[n] for n in node_labels])
352+
c_cen_dict = nx.closeness_centrality(G)
353+
c_cen = np.array([c_cen_dict[n] for n in node_labels])
354+
nod_eff = nodal_efficiency(G, n_nodes)
303355
loc_eff = local_efficiency(G)
304-
deg = G.degree().values()
356+
deg_dict = G.degree()
357+
deg = [deg_dict[n] for n in node_labels]
305358
return dict(lp=lp, clust=clust, b_cen=b_cen, c_cen=c_cen, nod_eff=nod_eff,
306359
loc_eff=loc_eff, deg=deg)

brainx/tests/test_metrics.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,83 @@
1717
# Functions
1818
#-----------------------------------------------------------------------------
1919

20+
def test_nodal_pathlengths():
21+
corr_mat = np.array([[0.0, 0.0, 0.0, 0.0, 0.0],
22+
[0.5, 0.0, 0.0, 0.0, 0.0],
23+
[0.3, 0.4, 0.0, 0.0, 0.0],
24+
[0.0, 0.0, 0.7, 0.0, 0.0],
25+
[0.0, 0.0, 0.0, 0.4, 0.0]])
26+
n_nodes = 5
27+
g = nx.from_numpy_matrix(corr_mat)
28+
path_lengths = metrics.nodal_pathlengths(g, n_nodes)
29+
# Distances for all node pairs:
30+
# 0-1: 1 1-2: 1 2-3: 1 3-4: 1
31+
# 0-2: 1 1-3: 2 2-4: 2
32+
# 0-3: 2 1-4: 3
33+
# 0-4: 3
34+
desired = 1.0 / (n_nodes - 1) * np.array([1 + 1 + 2 + 3,
35+
1 + 1 + 2 + 3,
36+
1 + 1 + 1 + 2,
37+
2 + 2 + 1 + 1,
38+
3 + 3 + 2 + 1])
39+
npt.assert_array_almost_equal(path_lengths, desired)
40+
# Check how unreachability is handled.
41+
g.remove_edge(2, 3)
42+
# Now all nodes have at least one edge, but not all nodes are reachable
43+
# from all others.
44+
path_lengths = metrics.nodal_pathlengths(g, n_nodes)
45+
# Distances for all node pairs:
46+
# 0-1: 1 1-2: 1 2-3: Inf 3-4: 1
47+
# 0-2: 1 1-3: Inf 2-4: Inf
48+
# 0-3: Inf 1-4: Inf
49+
# 0-4: Inf
50+
desired = 1.0 / (n_nodes - 1) * np.array([1 + 1 + np.inf + np.inf,
51+
1 + 1 + np.inf + np.inf,
52+
1 + 1 + np.inf + np.inf,
53+
np.inf + np.inf + np.inf + 1,
54+
np.inf + np.inf + np.inf + 1])
55+
npt.assert_array_almost_equal(path_lengths, desired)
56+
57+
58+
def test_nodal_efficiency():
59+
corr_mat = np.array([[0.0, 0.0, 0.0, 0.0, 0.0],
60+
[0.5, 0.0, 0.0, 0.0, 0.0],
61+
[0.3, 0.4, 0.0, 0.0, 0.0],
62+
[0.0, 0.0, 0.7, 0.0, 0.0],
63+
[0.0, 0.0, 0.0, 0.4, 0.0]])
64+
n_nodes = 5
65+
g = nx.from_numpy_matrix(corr_mat)
66+
n_eff_array = metrics.nodal_efficiency(g, n_nodes)
67+
# Distances for all node pairs:
68+
# 0-1: 1 1-2: 1 2-3: 1 3-4: 1
69+
# 0-2: 1 1-3: 2 2-4: 2
70+
# 0-3: 2 1-4: 3
71+
# 0-4: 3
72+
desired = 1.0 / (n_nodes - 1) * np.array([1 + 1 + 1 / 2.0 + 1 / 3.0,
73+
1 + 1 + 1 / 2.0 + 1 / 3.0,
74+
1 + 1 + 1 + 1 / 2.0,
75+
1 / 2.0 + 1 / 2.0 + 1 + 1,
76+
1 / 3.0 + 1 / 3.0 + 1 / 2.0 + 1])
77+
npt.assert_array_almost_equal(n_eff_array, desired)
78+
# Check how unreachability is handled.
79+
g.remove_edge(2, 3)
80+
# Now all nodes have at least one edge, but not all nodes are reachable
81+
# from all others.
82+
n_eff_array = metrics.nodal_efficiency(g, n_nodes)
83+
# Distances for all node pairs:
84+
# 0-1: 1 1-2: 1 2-3: Inf 3-4: 1
85+
# 0-2: 1 1-3: Inf 2-4: Inf
86+
# 0-3: Inf 1-4: Inf
87+
# 0-4: Inf
88+
desired = (1.0 / (n_nodes - 1) *
89+
np.array([1 + 1 + 1 / np.inf + 1 / np.inf,
90+
1 + 1 + 1 / np.inf + 1 / np.inf,
91+
1 + 1 + 1 / np.inf + 1 / np.inf,
92+
1 / np.inf + 1 / np.inf + 1 / np.inf + 1,
93+
1 / np.inf + 1 / np.inf + 1 / np.inf + 1]))
94+
npt.assert_array_almost_equal(n_eff_array, desired)
95+
96+
2097
def test_path_lengths():
2198
"""Very primitive tests, just using complete graphs which are easy. Better
2299
than nothing..."""

0 commit comments

Comments
 (0)