nipy
diff --git a/‎brainx/metrics.py‎
Lines changed: 159 additions & 98 deletions b/‎brainx/metrics.py‎
Lines changed: 159 additions & 98 deletions
@@ -5,14 +5,48 @@
 # Imports
 #-----------------------------------------------------------------------------
 
-
 import networkx as nx
 import numpy as np
 from scipy import sparse
 
 #-----------------------------------------------------------------------------
 # Functions
 #-----------------------------------------------------------------------------
+
+def inter_node_distances(graph):
+    """Compute the shortest path lengths between all nodes in graph.
+
+    This performs the same operation as NetworkX's
+    all_pairs_shortest_path_lengths with two exceptions: Here, self
+    paths are excluded from the dictionary returned, and the distance
+    between disconnected nodes is set to infinity.  The latter
+    difference is consistent with the Brain Connectivity Toolbox for
+    Matlab.
+
+    Parameters
+    ----------
+    graph: networkx Graph
+        An undirected graph.
+
+    Returns
+    -------
+    lengths: dictionary
+        Dictionary of shortest path lengths keyed by source and target.
+
+    """
+    lengths = nx.all_pairs_shortest_path_length(graph)
+    node_labels = sorted(lengths)
+    for src in node_labels:
+        lengths[src].pop(src)
+        for targ in node_labels:
+            if src != targ:
+                try:
+                    lengths[src][targ]
+                except KeyError:
+                    lengths[src][targ] = np.inf
+    return lengths
+
+
 def compute_sigma(arr,clustarr,lparr):
     """ Function for computing sigma given a graph array arr and clust and lp
     arrays from a pseudorandom graph for a particular block b."""
@@ -24,35 +58,41 @@ def compute_sigma(arr,clustarr,lparr):
 
     return out
 
-def nodal_pathlengths(G,n_nodes):
-    """ Compute mean path length for each node.
-    Note: it is unclear how to treat infinite path lengths.  For now, I replace them with np.inf, but this may make taking the mean later on difficult
-    Inputs: G graph data output from mkgraph; n_nodes number of nodes in graph"""
-    nodal_means=np.zeros((n_nodes),dtype=float)
-    lengths= nx.all_pairs_shortest_path_length(G)
-    for src,pair in lengths.iteritems():
-        source_paths=[]
-        source_arr=np.array([])
-        for targ,val in pair.items():
-            if src==targ:
-                continue # we want to include src,target repeats, right?
-            source_paths.append(float(val))
-            source_arr=np.array(source_paths)
-           
-        if source_arr.size==0: #make the mean path length 0 if node is disconnected
-            source_arr=np.array([np.nan])
-        nodal_means[src]=source_arr.mean()
-    #nodal_array=np.array(nodal_means)
-    return nodal_means
-
-def assert_no_selfloops(G):
-    """Raise an error if the graph G has any selfloops.
+
+def nodal_pathlengths(graph):
+    """Compute mean path length for each node.
+
+    Parameters
+    ----------
+    graph: networkx Graph
+        An undirected graph.
+
+    Returns
+    -------
+    nodal_means: numpy array
+        An array with each node's mean shortest path length to all other
+        nodes.  The array is in ascending order of node labels.
+
+    Notes
+    -----
+    Per the Brain Connectivity Toolbox for Matlab, the distance between
+    one node and another that cannot be reached from it is set to
+    infinity.
+
     """
-    if G.nodes_with_selfloops():
+    lengths = inter_node_distances(graph)
+    nodal_means = [np.mean(lengths[src].values()) for src in sorted(lengths)]
+    return np.array(nodal_means)
+
+
+def assert_no_selfloops(graph):
+    """Raise an error if the graph graph has any selfloops.
+    """
+    if graph.nodes_with_selfloops():
         raise ValueError("input graph can not have selfloops")
 
-#@profile
-def path_lengths(G):
+
+def path_lengths(graph):
     """Compute array of all shortest path lengths for the given graph.
 
     The length of the output array is the number of unique pairs of nodes that
@@ -64,12 +104,12 @@ def path_lengths(G):
     
     Parameters
     ----------
-    G : an undirected graph object.
+    graph : an undirected graph object.
     """
 
-    assert_no_selfloops(G)
+    assert_no_selfloops(graph)
 
-    length = nx.all_pairs_shortest_path_length(G)
+    length = nx.all_pairs_shortest_path_length(graph)
     paths = []
     seen = set()
     for src,targets in length.iteritems():
@@ -82,7 +122,7 @@ def path_lengths(G):
 
 
 #@profile
-def path_lengthsSPARSE(G):
+def path_lengthsSPARSE(graph):
     """Compute array of all shortest path lengths for the given graph.
 
     XXX - implementation using scipy.sparse.  This might be faster for very
@@ -99,14 +139,14 @@ def path_lengthsSPARSE(G):
     
     Parameters
     ----------
-    G : an undirected graph object.
+    graph : an undirected graph object.
     """
 
-    assert_no_selfloops(G)
+    assert_no_selfloops(graph)
 
-    length = nx.all_pairs_shortest_path_length(G)
+    length = nx.all_pairs_shortest_path_length(graph)
 
-    nnod = G.number_of_nodes()
+    nnod = graph.number_of_nodes()
     paths_mat = sparse.dok_matrix((nnod,nnod))
 
     for src,targets in length.iteritems():
@@ -116,45 +156,59 @@ def path_lengthsSPARSE(G):
     return sparse.triu(paths_mat,1).data
 
 
-def glob_efficiency(G):
+def glob_efficiency(graph):
     """Compute array of global efficiency for the given graph.
 
     Global efficiency: returns a list of the inverse path length matrix
     across all nodes The mean of this value is equal to the global efficiency
     of the network."""
 
-    return 1.0/path_lengths(G)
-        
-def nodal_efficiency(G):
-    """Compute array of global efficiency for the given graph.
+    return 1.0/path_lengths(graph)
 
-    Nodal efficiency: XXX - define."""
-        
-    nodepaths=[]
-    length = nx.all_pairs_shortest_path_length(G)
-    for src,targets in length.iteritems():
-        paths=[]
-        for targ,val in targets.items():
-            if src==targ:
-                continue
-            
-            paths.append(1.0/val)
-        
-        nodepaths.append(np.mean(paths))
-        
-    return np.array(nodepaths) 
 
-#@profile
-def local_efficiency(G):
+def nodal_efficiency(graph):
+    """Return array with nodal efficiency for each node in graph.
+
+    See Achard and Bullmore (2007, PLoS Comput Biol) for the definition
+    of nodal efficiency.
+
+    Parameters
+    ----------
+    graph: networkx Graph
+        An undirected graph.
+
+    Returns
+    -------
+    nodal_efficiencies: numpy array
+        An array with the nodal efficiency for each node in graph, in
+        the order specified by node_labels.  The array is in ascending
+        order of node labels.
+
+    Notes
+    -----
+    Per the Brain Connectivity Toolbox for Matlab, the distance between
+    one node and another that cannot be reached from it is set to
+    infinity.
+
+    """
+    lengths = inter_node_distances(graph)
+    nodal_efficiencies = np.zeros(len(lengths), dtype=float)
+    for src in sorted(lengths):
+        inverse_paths = [1.0 / val for val in lengths[src].itervalues()]
+        nodal_efficiencies[src] = np.mean(inverse_paths)
+    return nodal_efficiencies
+
+
+def local_efficiency(graph):
     """Compute array of global efficiency for the given grap.h
 
     Local efficiency: returns a list of paths that represent the nodal
     efficiencies across all nodes with their direct neighbors"""
 
     nodepaths=[]
-    length=nx.all_pairs_shortest_path_length(G)
-    for n in G.nodes():
-        nneighb= nx.neighbors(G,n)
+    length=nx.all_pairs_shortest_path_length(graph)
+    for n in graph.nodes():
+        nneighb= nx.neighbors(graph,n)
 
         paths=[]
         for src,targets in length.iteritems():
@@ -176,19 +230,18 @@ def local_efficiency(G):
     return np.array(nodepaths)
 
 
-#@profile
-def local_efficiency(G):
+def local_efficiency(graph):
     """Compute array of local efficiency for the given graph.
 
     Local efficiency: returns a list of paths that represent the nodal
     efficiencies across all nodes with their direct neighbors"""
 
-    assert_no_selfloops(G)
+    assert_no_selfloops(graph)
 
     nodepaths = []
-    length = nx.all_pairs_shortest_path_length(G)
-    for n in G:
-        nneighb = set(nx.neighbors(G,n))
+    length = nx.all_pairs_shortest_path_length(graph)
+    for n in graph:
+        nneighb = set(nx.neighbors(graph,n))
 
         paths = []
         for nei in nneighb:
@@ -205,20 +258,20 @@ def local_efficiency(G):
     return np.array(nodepaths)
 
 
-def dynamical_importance(G):
-    """Compute dynamical importance for G.
+def dynamical_importance(graph):
+    """Compute dynamical importance for graph.
 
     Ref: Restrepo, Ott, Hunt. Phys. Rev. Lett. 97, 094102 (2006)
     """
     # spectrum of the original graph
-    eigvals = nx.adjacency_spectrum(G)
+    eigvals = nx.adjacency_spectrum(graph)
     lambda0 = eigvals[0]
-    # Now, loop over all nodes in G, and for each, make a copy of G, remove
+    # Now, loop over all nodes in graph, and for each, make a copy of graph, remove
     # that node, and compute the change in lambda.
-    nnod = G.number_of_nodes()
+    nnod = graph.number_of_nodes()
     dyimp = np.empty(nnod,float)
     for n in range(nnod):
-        gn = G.copy()
+        gn = graph.copy()
         gn.remove_node(n)
         lambda_n = nx.adjacency_spectrum(gn)[0]
         dyimp[n] = lambda0 - lambda_n
@@ -227,22 +280,22 @@ def dynamical_importance(G):
     return dyimp
 
 
-def weighted_degree(G):
+def weighted_degree(graph):
     """Return an array of degrees that takes weights into account.
 
     For unweighted graphs, this is the same as the normal degree() method
     (though we return an array instead of a list).
     """
-    amat = nx.adj_matrix(G).A  # get a normal array out of it
+    amat = nx.adj_matrix(graph).A  # get a normal array out of it
     return abs(amat).sum(0)  # weights are sums across rows
 
 
-def graph_summary(G):
+def graph_summary(graph):
     """Compute a set of statistics summarizing the structure of a graph.
     
     Parameters
     ----------
-    G : a graph object.
+    graph : a graph object.
 
     threshold : float, optional
 
@@ -252,36 +305,44 @@ def graph_summary(G):
     """
 
     # Average path length
-    lp = path_lengths(G)
-    clust = np.array(nx.clustering(G).values())
-    glob_eff = glob_efficiency(G)
-    loc_eff = local_efficiency(G)
+    lp = path_lengths(graph)
+    clust = np.array(nx.clustering(graph).values())
+    glob_eff = glob_efficiency(graph)
+    loc_eff = local_efficiency(graph)
 
     return dict( lp=lp.mean(), clust=clust.mean(), glob_eff=glob_eff.mean(),
                  loc_eff=loc_eff.mean() )
 
-def nodal_summaryOut(G, n_nodes):
-    """ Compute statistics for individual nodes
+
+def nodal_summaryOut(graph):
+    """Compute statistics for individual nodes.
 
     Parameters
     ----------
-    G: graph data output from mkgraph
-    out: array output from nodal_summaryOut, so can keep appending
-    cost: cost value for these calculations
-    n_nodes: number of nodes in graph.
-
+    graph: networkx graph
+        An undirected graph.
+        
     Returns
     -------
+    dictionary
+        The keys of this dictionary are lp (which refers to path
+        length), clust (clustering coefficient), b_cen (betweenness
+        centrality), c_cen (closeness centrality), nod_eff (nodal
+        efficiency), loc_eff (local efficiency), and deg (degree).  The
+        values are arrays (or lists, in some cases) of metrics, in
+        ascending order of node labels.
 
-    A dict with: lp, clust, b_cen, c_cen, nod_eff, loc_eff, degree."""
-    
-    lp = nodal_pathlengths(G,n_nodes) #can't use the regular one, because it substitutes [] for disconnected nodes
-    clust = np.array(nx.clustering(G).values())
-    b_cen = np.array(nx.betweenness_centrality(G).values())
-    c_cen = np.array(nx.closeness_centrality(G).values())
-    nod_eff=nodal_efficiency(G)
-    loc_eff=local_efficiency(G)
-    deg = G.degree().values()
-
+    """
+    lp = nodal_pathlengths(graph)
+    clust_dict = nx.clustering(graph)
+    clust = np.array([clust_dict[n] for n in sorted(clust_dict)])
+    b_cen_dict = nx.betweenness_centrality(graph)
+    b_cen = np.array([b_cen_dict[n] for n in sorted(b_cen_dict)])
+    c_cen_dict = nx.closeness_centrality(graph)
+    c_cen = np.array([c_cen_dict[n] for n in sorted(c_cen_dict)])
+    nod_eff = nodal_efficiency(graph)
+    loc_eff = local_efficiency(graph)
+    deg_dict = graph.degree()
+    deg = [deg_dict[n] for n in sorted(deg_dict)]
     return dict(lp=lp, clust=clust, b_cen=b_cen, c_cen=c_cen, nod_eff=nod_eff,
-                loc_eff=loc_eff,deg=deg)
+                loc_eff=loc_eff, deg=deg)