|
| 1 | +"""Compute various useful metrics. |
| 2 | +""" |
| 3 | + |
| 4 | +#----------------------------------------------------------------------------- |
| 5 | +# Imports |
| 6 | +#----------------------------------------------------------------------------- |
| 7 | + |
| 8 | +import networkx as nx |
| 9 | +import numpy as np |
| 10 | +from scipy import sparse |
| 11 | + |
| 12 | +#----------------------------------------------------------------------------- |
| 13 | +# Functions |
| 14 | +#----------------------------------------------------------------------------- |
| 15 | + |
| 16 | +def inter_node_distances(graph): |
| 17 | + """Compute the shortest path lengths between all nodes in graph. |
| 18 | +
|
| 19 | + This performs the same operation as NetworkX's |
| 20 | + all_pairs_shortest_path_lengths with two exceptions: Here, self |
| 21 | + paths are excluded from the dictionary returned, and the distance |
| 22 | + between disconnected nodes is set to infinity. The latter |
| 23 | + difference is consistent with the Brain Connectivity Toolbox for |
| 24 | + Matlab. |
| 25 | +
|
| 26 | + Parameters |
| 27 | + ---------- |
| 28 | + graph: networkx Graph |
| 29 | + An undirected graph. |
| 30 | +
|
| 31 | + Returns |
| 32 | + ------- |
| 33 | + lengths: dictionary |
| 34 | + Dictionary of shortest path lengths keyed by source and target. |
| 35 | +
|
| 36 | + """ |
| 37 | + lengths = nx.all_pairs_shortest_path_length(graph) |
| 38 | + node_labels = sorted(lengths) |
| 39 | + for src in node_labels: |
| 40 | + lengths[src].pop(src) |
| 41 | + for targ in node_labels: |
| 42 | + if src != targ: |
| 43 | + try: |
| 44 | + lengths[src][targ] |
| 45 | + except KeyError: |
| 46 | + lengths[src][targ] = np.inf |
| 47 | + return lengths |
| 48 | + |
| 49 | + |
| 50 | +def compute_sigma(arr,clustarr,lparr): |
| 51 | + """ Function for computing sigma given a graph array arr and clust and lp |
| 52 | + arrays from a pseudorandom graph for a particular block b.""" |
| 53 | + |
| 54 | + gc = arr['clust']#np.squeeze(arr['clust']) |
| 55 | + glp = arr['lp']#np.squeeze(arr['lp']) |
| 56 | + out = (gc/clustarr)/(glp/lparr) |
| 57 | + |
| 58 | + |
| 59 | + return out |
| 60 | + |
| 61 | + |
| 62 | +def nodal_pathlengths(graph): |
| 63 | + """Compute mean path length for each node. |
| 64 | +
|
| 65 | + Parameters |
| 66 | + ---------- |
| 67 | + graph: networkx Graph |
| 68 | + An undirected graph. |
| 69 | +
|
| 70 | + Returns |
| 71 | + ------- |
| 72 | + nodal_means: numpy array |
| 73 | + An array with each node's mean shortest path length to all other |
| 74 | + nodes. The array is in ascending order of node labels. |
| 75 | +
|
| 76 | + Notes |
| 77 | + ----- |
| 78 | + Per the Brain Connectivity Toolbox for Matlab, the distance between |
| 79 | + one node and another that cannot be reached from it is set to |
| 80 | + infinity. |
| 81 | +
|
| 82 | + """ |
| 83 | + lengths = inter_node_distances(graph) |
| 84 | + nodal_means = [np.mean(lengths[src].values()) for src in sorted(lengths)] |
| 85 | + return np.array(nodal_means) |
| 86 | + |
| 87 | + |
| 88 | +def assert_no_selfloops(graph): |
| 89 | + """Raise an error if the graph graph has any selfloops. |
| 90 | + """ |
| 91 | + if graph.nodes_with_selfloops(): |
| 92 | + raise ValueError("input graph can not have selfloops") |
| 93 | + |
| 94 | + |
| 95 | +def path_lengths(graph): |
| 96 | + """Compute array of all shortest path lengths for the given graph. |
| 97 | +
|
| 98 | + The length of the output array is the number of unique pairs of nodes that |
| 99 | + have a connecting path, so in general it is not known in advance. |
| 100 | +
|
| 101 | + This assumes the graph is undirected, as for any pair of reachable nodes, |
| 102 | + once we've seen the pair we do not keep the path length value for the |
| 103 | + inverse path. |
| 104 | + |
| 105 | + Parameters |
| 106 | + ---------- |
| 107 | + graph : an undirected graph object. |
| 108 | + """ |
| 109 | + |
| 110 | + assert_no_selfloops(graph) |
| 111 | + |
| 112 | + length = nx.all_pairs_shortest_path_length(graph) |
| 113 | + paths = [] |
| 114 | + seen = set() |
| 115 | + for src,targets in length.iteritems(): |
| 116 | + seen.add(src) |
| 117 | + neigh = set(targets.keys()) - seen |
| 118 | + paths.extend(targets[targ] for targ in neigh) |
| 119 | + |
| 120 | + |
| 121 | + return np.array(paths) |
| 122 | + |
| 123 | + |
| 124 | +#@profile |
| 125 | +def path_lengthsSPARSE(graph): |
| 126 | + """Compute array of all shortest path lengths for the given graph. |
| 127 | +
|
| 128 | + XXX - implementation using scipy.sparse. This might be faster for very |
| 129 | + sparse graphs, but so far for our cases the overhead of handling the sparse |
| 130 | + matrices doesn't seem to be worth it. We're leaving it in for now, in case |
| 131 | + we revisit this later and it proves useful. |
| 132 | +
|
| 133 | + The length of the output array is the number of unique pairs of nodes that |
| 134 | + have a connecting path, so in general it is not known in advance. |
| 135 | +
|
| 136 | + This assumes the graph is undirected, as for any pair of reachable nodes, |
| 137 | + once we've seen the pair we do not keep the path length value for the |
| 138 | + inverse path. |
| 139 | + |
| 140 | + Parameters |
| 141 | + ---------- |
| 142 | + graph : an undirected graph object. |
| 143 | + """ |
| 144 | + |
| 145 | + assert_no_selfloops(graph) |
| 146 | + |
| 147 | + length = nx.all_pairs_shortest_path_length(graph) |
| 148 | + |
| 149 | + nnod = graph.number_of_nodes() |
| 150 | + paths_mat = sparse.dok_matrix((nnod,nnod)) |
| 151 | + |
| 152 | + for src,targets in length.iteritems(): |
| 153 | + for targ,val in targets.items(): |
| 154 | + paths_mat[src,targ] = val |
| 155 | + |
| 156 | + return sparse.triu(paths_mat,1).data |
| 157 | + |
| 158 | + |
| 159 | +def glob_efficiency(graph): |
| 160 | + """Compute array of global efficiency for the given graph. |
| 161 | +
|
| 162 | + Global efficiency: returns a list of the inverse path length matrix |
| 163 | + across all nodes The mean of this value is equal to the global efficiency |
| 164 | + of the network.""" |
| 165 | + |
| 166 | + return 1.0/path_lengths(graph) |
| 167 | + |
| 168 | + |
| 169 | +def nodal_efficiency(graph): |
| 170 | + """Return array with nodal efficiency for each node in graph. |
| 171 | +
|
| 172 | + See Achard and Bullmore (2007, PLoS Comput Biol) for the definition |
| 173 | + of nodal efficiency. |
| 174 | +
|
| 175 | + Parameters |
| 176 | + ---------- |
| 177 | + graph: networkx Graph |
| 178 | + An undirected graph. |
| 179 | +
|
| 180 | + Returns |
| 181 | + ------- |
| 182 | + nodal_efficiencies: numpy array |
| 183 | + An array with the nodal efficiency for each node in graph, in |
| 184 | + the order specified by node_labels. The array is in ascending |
| 185 | + order of node labels. |
| 186 | +
|
| 187 | + Notes |
| 188 | + ----- |
| 189 | + Per the Brain Connectivity Toolbox for Matlab, the distance between |
| 190 | + one node and another that cannot be reached from it is set to |
| 191 | + infinity. |
| 192 | +
|
| 193 | + """ |
| 194 | + lengths = inter_node_distances(graph) |
| 195 | + nodal_efficiencies = np.zeros(len(lengths), dtype=float) |
| 196 | + for src in sorted(lengths): |
| 197 | + inverse_paths = [1.0 / val for val in lengths[src].itervalues()] |
| 198 | + nodal_efficiencies[src] = np.mean(inverse_paths) |
| 199 | + return nodal_efficiencies |
| 200 | + |
| 201 | + |
| 202 | +def local_efficiency(graph): |
| 203 | + """Compute array of global efficiency for the given grap.h |
| 204 | +
|
| 205 | + Local efficiency: returns a list of paths that represent the nodal |
| 206 | + efficiencies across all nodes with their direct neighbors""" |
| 207 | + |
| 208 | + nodepaths=[] |
| 209 | + length=nx.all_pairs_shortest_path_length(graph) |
| 210 | + for n in graph.nodes(): |
| 211 | + nneighb= nx.neighbors(graph,n) |
| 212 | + |
| 213 | + paths=[] |
| 214 | + for src,targets in length.iteritems(): |
| 215 | + for targ,val in targets.iteritems(): |
| 216 | + val=float(val) |
| 217 | + if src==targ: |
| 218 | + continue |
| 219 | + if src in nneighb and targ in nneighb: |
| 220 | + |
| 221 | + paths.append(1/val) |
| 222 | + |
| 223 | + p=np.array(paths) |
| 224 | + psize=np.size(p) |
| 225 | + if (psize==0): |
| 226 | + p=np.array(0) |
| 227 | + |
| 228 | + nodepaths.append(p.mean()) |
| 229 | + |
| 230 | + return np.array(nodepaths) |
| 231 | + |
| 232 | + |
| 233 | +def local_efficiency(graph): |
| 234 | + """Compute array of local efficiency for the given graph. |
| 235 | +
|
| 236 | + Local efficiency: returns a list of paths that represent the nodal |
| 237 | + efficiencies across all nodes with their direct neighbors""" |
| 238 | + |
| 239 | + assert_no_selfloops(graph) |
| 240 | + |
| 241 | + nodepaths = [] |
| 242 | + length = nx.all_pairs_shortest_path_length(graph) |
| 243 | + for n in graph: |
| 244 | + nneighb = set(nx.neighbors(graph,n)) |
| 245 | + |
| 246 | + paths = [] |
| 247 | + for nei in nneighb: |
| 248 | + other_neighbors = nneighb - set([nei]) |
| 249 | + nei_len = length[nei] |
| 250 | + paths.extend( [nei_len[o] for o in other_neighbors] ) |
| 251 | + |
| 252 | + if paths: |
| 253 | + p = 1.0 / np.array(paths,float) |
| 254 | + nodepaths.append(p.mean()) |
| 255 | + else: |
| 256 | + nodepaths.append(0.0) |
| 257 | + |
| 258 | + return np.array(nodepaths) |
| 259 | + |
| 260 | + |
| 261 | +def dynamical_importance(graph): |
| 262 | + """Compute dynamical importance for graph. |
| 263 | +
|
| 264 | + Ref: Restrepo, Ott, Hunt. Phys. Rev. Lett. 97, 094102 (2006) |
| 265 | + """ |
| 266 | + # spectrum of the original graph |
| 267 | + eigvals = nx.adjacency_spectrum(graph) |
| 268 | + lambda0 = eigvals[0] |
| 269 | + # Now, loop over all nodes in graph, and for each, make a copy of graph, remove |
| 270 | + # that node, and compute the change in lambda. |
| 271 | + nnod = graph.number_of_nodes() |
| 272 | + dyimp = np.empty(nnod,float) |
| 273 | + for n in range(nnod): |
| 274 | + gn = graph.copy() |
| 275 | + gn.remove_node(n) |
| 276 | + lambda_n = nx.adjacency_spectrum(gn)[0] |
| 277 | + dyimp[n] = lambda0 - lambda_n |
| 278 | + # Final normalization |
| 279 | + dyimp /= lambda0 |
| 280 | + return dyimp |
| 281 | + |
| 282 | + |
| 283 | +def weighted_degree(graph): |
| 284 | + """Return an array of degrees that takes weights into account. |
| 285 | +
|
| 286 | + For unweighted graphs, this is the same as the normal degree() method |
| 287 | + (though we return an array instead of a list). |
| 288 | + """ |
| 289 | + amat = nx.adj_matrix(graph).A # get a normal array out of it |
| 290 | + return abs(amat).sum(0) # weights are sums across rows |
| 291 | + |
| 292 | + |
| 293 | +def graph_summary(graph): |
| 294 | + """Compute a set of statistics summarizing the structure of a graph. |
| 295 | + |
| 296 | + Parameters |
| 297 | + ---------- |
| 298 | + graph : a graph object. |
| 299 | +
|
| 300 | + threshold : float, optional |
| 301 | +
|
| 302 | + Returns |
| 303 | + ------- |
| 304 | + Mean values for: lp, clust, glob_eff, loc_eff, in a dict. |
| 305 | + """ |
| 306 | + |
| 307 | + # Average path length |
| 308 | + lp = path_lengths(graph) |
| 309 | + clust = np.array(nx.clustering(graph).values()) |
| 310 | + glob_eff = glob_efficiency(graph) |
| 311 | + loc_eff = local_efficiency(graph) |
| 312 | + |
| 313 | + return dict( lp=lp.mean(), clust=clust.mean(), glob_eff=glob_eff.mean(), |
| 314 | + loc_eff=loc_eff.mean() ) |
| 315 | + |
| 316 | + |
| 317 | +def nodal_summaryOut(graph): |
| 318 | + """Compute statistics for individual nodes. |
| 319 | +
|
| 320 | + Parameters |
| 321 | + ---------- |
| 322 | + graph: networkx graph |
| 323 | + An undirected graph. |
| 324 | + |
| 325 | + Returns |
| 326 | + ------- |
| 327 | + dictionary |
| 328 | + The keys of this dictionary are lp (which refers to path |
| 329 | + length), clust (clustering coefficient), b_cen (betweenness |
| 330 | + centrality), c_cen (closeness centrality), nod_eff (nodal |
| 331 | + efficiency), loc_eff (local efficiency), and deg (degree). The |
| 332 | + values are arrays (or lists, in some cases) of metrics, in |
| 333 | + ascending order of node labels. |
| 334 | +
|
| 335 | + """ |
| 336 | + lp = nodal_pathlengths(graph) |
| 337 | + clust_dict = nx.clustering(graph) |
| 338 | + clust = np.array([clust_dict[n] for n in sorted(clust_dict)]) |
| 339 | + b_cen_dict = nx.betweenness_centrality(graph) |
| 340 | + b_cen = np.array([b_cen_dict[n] for n in sorted(b_cen_dict)]) |
| 341 | + c_cen_dict = nx.closeness_centrality(graph) |
| 342 | + c_cen = np.array([c_cen_dict[n] for n in sorted(c_cen_dict)]) |
| 343 | + nod_eff = nodal_efficiency(graph) |
| 344 | + loc_eff = local_efficiency(graph) |
| 345 | + deg_dict = graph.degree() |
| 346 | + deg = [deg_dict[n] for n in sorted(deg_dict)] |
| 347 | + return dict(lp=lp, clust=clust, b_cen=b_cen, c_cen=c_cen, nod_eff=nod_eff, |
| 348 | + loc_eff=loc_eff, deg=deg) |
0 commit comments