4444_exp_power = 2
4545_proc = cpu_count ()
4646_cache_maxsize = 1000000
47- _nbr_topk = 50
47+ _shortest_path = "all_pairs"
48+ _nbr_topk = 1000
49+ _apsp = {}
4850
4951# -------------------------------------------------------
5052
53+ @lru_cache (_cache_maxsize )
54+ def _get_single_node_neighbors_distributions (node , direction = "successors" ):
55+ """Get the neighbor density distribution of given node `node`.
56+
57+ Parameters
58+ ----------
59+ node : int
60+ Node index in Networkit graph `_Gk`.
61+ direction : {"predecessors", "successors"}
62+ Direction of neighbors in directed graph. (Default value: "successors")
63+
64+ Returns
65+ -------
66+ distributions : lists of float
67+ Density distributions of neighbors up to top `_nbr_topk` nodes.
68+ nbrs : lists of int
69+ Neighbor index up to top `_nbr_topk` nodes.
70+
71+ """
72+ if _Gk .isDirected ():
73+ if direction == "predecessors" :
74+ neighbors = _Gk .inNeighbors (node )
75+ else : # successors
76+ neighbors = _Gk .neighbors (node )
77+ else :
78+ neighbors = _Gk .neighbors (node )
79+
80+ # Get sum of distributions from x's all neighbors
81+ heap_weight_node_pair = []
82+ if direction == "predecessors" :
83+ for nbr in neighbors :
84+ if len (heap_weight_node_pair ) < _nbr_topk :
85+ heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
86+ else :
87+ heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
88+ else : # successors
89+ for nbr in neighbors :
90+ if len (heap_weight_node_pair ) < _nbr_topk :
91+ heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
92+ else :
93+ heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
94+
95+ nbr_edge_weight_sum = sum ([x [0 ] for x in heap_weight_node_pair ])
96+
97+ if nbr_edge_weight_sum > EPSILON :
98+ # Sum need to be not too small to prevent divided by zero
99+ distributions = [(1.0 - _alpha ) * w / nbr_edge_weight_sum for w , _ in heap_weight_node_pair ]
100+ nbr = [x [1 ] for x in heap_weight_node_pair ]
101+ return distributions + [_alpha ], nbr + [node ]
102+ elif len (neighbors ) == 0 :
103+ # No neighbor, all mass stay at node
104+ return [1 ], [node ]
105+ else :
106+ logger .warning ("Neighbor weight sum too small, list:" , heap_weight_node_pair )
107+ distributions = [(1.0 - _alpha ) / len (heap_weight_node_pair )] * len (heap_weight_node_pair )
108+ nbr = [x [1 ] for x in heap_weight_node_pair ]
109+ return distributions + [_alpha ], nbr + [node ]
51110
52- def _distribute_densities (source , target , nbr_topk = _nbr_topk ):
111+
112+ def _distribute_densities (source , target ):
53113 """Get the density distributions of source and target node, and the cost (all pair shortest paths) between
54- all source's and target's neighbors.
114+ all source's and target's neighbors. Notice that only neighbors with top `_nbr_topk` edge weights.
55115
56116 Parameters
57117 ----------
58118 source : int
59119 Source node index in Networkit graph `_Gk`.
60120 target : int
61121 Target node index in Networkit graph `_Gk`.
62- nbr_topk : int
63- Only take the neighbors with top k edge weights.
64122 Returns
65123 -------
66124 x : (m,) numpy.ndarray
@@ -72,50 +130,14 @@ def _distribute_densities(source, target, nbr_topk=_nbr_topk):
72130
73131 """
74132
75- # Append source and target node into weight distribution matrix x,y
133+ # Distribute densities for source and source's neighbors as x
76134 if _Gk .isDirected ():
77- source_nbr = _Gk . inNeighbors (source )
135+ x , source_topknbr = _get_single_node_neighbors_distributions (source , "predecessors" )
78136 else :
79- source_nbr = _Gk .neighbors (source )
80- target_nbr = _Gk .neighbors (target )
81-
82- def _get_single_node_neighbors_distributions (node , neighbors , direction = "successors" ):
83- # Get sum of distributions from x's all neighbors
84- heap_weight_node_pair = []
85- if direction == "predecessors" :
86- for nbr in neighbors :
87- if len (heap_weight_node_pair ) < nbr_topk :
88- heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
89- else :
90- heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
91- else : # successors
92- for nbr in neighbors :
93- if len (heap_weight_node_pair ) < nbr_topk :
94- heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
95- else :
96- heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
97-
98- nbr_edge_weight_sum = sum ([x [0 ] for x in heap_weight_node_pair ])
99-
100- if nbr_edge_weight_sum > EPSILON :
101- # Sum need to be not too small to prevent divided by zero
102- distributions = [(1.0 - _alpha ) * w / nbr_edge_weight_sum for w , _ in heap_weight_node_pair ]
103- nbr = [x [1 ] for x in heap_weight_node_pair ]
104- return distributions + [_alpha ], nbr + [node ]
105- elif len (neighbors ) == 0 :
106- # No neighbor, all mass stay at node
107- return [1 ], [node ]
108- else :
109- logger .warning ("Neighbor weight sum too small, list:" , heap_weight_node_pair )
110- distributions = [(1.0 - _alpha ) / len (heap_weight_node_pair )] * len (heap_weight_node_pair )
111- nbr = [x [1 ] for x in heap_weight_node_pair ]
112- return distributions + [_alpha ], nbr + [node ]
113-
114- # Distribute densities for source and source's neighbors as x
115- x , source_topknbr = _get_single_node_neighbors_distributions (source , source_nbr , "predecessors" )
137+ x , source_topknbr = _get_single_node_neighbors_distributions (source , "successors" )
116138
117139 # Distribute densities for target and target's neighbors as y
118- y , target_topknbr = _get_single_node_neighbors_distributions (target , target_nbr , "successors" )
140+ y , target_topknbr = _get_single_node_neighbors_distributions (target , "successors" )
119141
120142 # construct the cost dictionary from x to y
121143 d = np .zeros ((len (x ), len (y )))
@@ -131,7 +153,7 @@ def _get_single_node_neighbors_distributions(node, neighbors, direction="success
131153
132154
133155@lru_cache (_cache_maxsize )
134- def _get_pairwise_sp (source , target ):
156+ def _source_target_shortest_path (source , target ):
135157 """Compute pairwise shortest path from `source` to `target` by BidirectionalDijkstra via Networkit.
136158
137159 Parameters
@@ -153,6 +175,42 @@ def _get_pairwise_sp(source, target):
153175 return length
154176
155177
178+ def _get_pairwise_sp (source , target ):
179+ """Compute pairwise shortest path from `source` to `target`.
180+
181+ Parameters
182+ ----------
183+ source : int
184+ Source node index in Networkit graph `_Gk`.
185+ target : int
186+ Target node index in Networkit graph `_Gk`.
187+
188+ Returns
189+ -------
190+ length : float
191+ Pairwise shortest path length.
192+
193+ """
194+
195+ if _shortest_path == "pairwise" :
196+ return _source_target_shortest_path (source , target )
197+
198+ return _apsp [source ][target ]
199+
200+
201+ def _get_all_pairs_shortest_path ():
202+ """Pre-compute all pairs shortest paths of the assigned graph `_Gk`."""
203+ logger .info ("Start to compute all pair shortest path." )
204+
205+ global _Gk
206+
207+ t0 = time .time ()
208+ apsp = nk .distance .APSP (_Gk ).run ().getDistances ()
209+ logger .info ("%8f secs for all pair by NetworKit." % (time .time () - t0 ))
210+
211+ return apsp
212+
213+
156214def _optimal_transportation_distance (x , y , d ):
157215 """Compute the optimal transportation distance (OTD) of the given density distributions by CVXPY.
158216
@@ -287,12 +345,12 @@ def _compute_ricci_curvature_single_edge(source, target):
287345 assert _method in ["OTD" , "ATD" , "Sinkhorn" ], \
288346 'Method %s not found, support method:["OTD", "ATD", "Sinkhorn"]' % _method
289347 if _method == "OTD" :
290- x , y , d = _distribute_densities (source , target , _nbr_topk )
348+ x , y , d = _distribute_densities (source , target )
291349 m = _optimal_transportation_distance (x , y , d )
292350 elif _method == "ATD" :
293351 m = _average_transportation_distance (source , target )
294352 elif _method == "Sinkhorn" :
295- x , y , d = _distribute_densities (source , target , _nbr_topk )
353+ x , y , d = _distribute_densities (source , target )
296354 m = _sinkhorn_distance (x , y , d )
297355
298356 # compute Ricci curvature: k=1-(m_{x,y})/d(x,y)
@@ -310,7 +368,7 @@ def _wrap_compute_single_edge(stuff):
310368def _compute_ricci_curvature_edges (G : nx .Graph , weight = "weight" , edge_list = [],
311369 alpha = 0.5 , method = "OTD" ,
312370 base = math .e , exp_power = 2 , proc = cpu_count (), chunksize = None , cache_maxsize = 1000000 ,
313- nbr_topk = 1000 ):
371+ shortest_path = "all_pairs" , nbr_topk = 1000 ):
314372 """Compute Ricci curvature for edges in given edge lists.
315373
316374 Parameters
@@ -344,6 +402,8 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
344402 cache_maxsize : int
345403 Max size for LRU cache for pairwise shortest path computation.
346404 Set this to `None` for unlimited cache. (Default value = 1000000)
405+ shortest_path : {"all_pairs","pairwise"}
406+ Method to compute shortest path. (Default value = `all_pairs`)
347407 nbr_topk : int
348408 Only take the top k edge weight neighbors for density distribution.
349409 Smaller k run faster but the result is less accurate. (Default value = 1000)
@@ -355,6 +415,9 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
355415
356416 """
357417
418+ logger .info ("Number of nodes: %d" % G .number_of_nodes ())
419+ logger .info ("Number of edges: %d" % G .number_of_edges ())
420+
358421 if not nx .get_edge_attributes (G , weight ):
359422 print ('Edge weight not detected in graph, use "weight" as default edge weight.' )
360423 for (v1 , v2 ) in G .edges ():
@@ -369,7 +432,9 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
369432 global _exp_power
370433 global _proc
371434 global _cache_maxsize
435+ global _shortest_path
372436 global _nbr_topk
437+ global _apsp
373438 # -------------------------------------------------------
374439
375440 _Gk = nk .nxadapter .nx2nk (G , weightAttr = weight )
@@ -380,6 +445,7 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
380445 _exp_power = exp_power
381446 _proc = proc
382447 _cache_maxsize = cache_maxsize
448+ _shortest_path = shortest_path
383449 _nbr_topk = nbr_topk
384450
385451 # Construct nx to nk dictionary
@@ -388,6 +454,11 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
388454 nx2nk_ndict [n ] = idx
389455 nk2nx_ndict [idx ] = n
390456
457+ if _shortest_path == "all_pairs" :
458+ # Construct the all pair shortest path dictionary
459+ if not _apsp :
460+ _apsp = _get_all_pairs_shortest_path ()
461+
391462 if edge_list :
392463 args = [(nx2nk_ndict [source ], nx2nk_ndict [target ]) for source , target in edge_list ]
393464 else :
@@ -500,12 +571,11 @@ def _compute_ricci_flow(G: nx.Graph, weight="weight",
500571 G = nx .Graph (G .subgraph (max (nx .connected_components (G ), key = len )))
501572 G .remove_edges_from (nx .selfloop_edges (G ))
502573
503- logger .info ("Number of nodes: %d" % G .number_of_nodes ())
504- logger .info ("Number of edges: %d" % G .number_of_edges ())
505-
506574 # Set normalized weight to be the number of edges.
507575 normalized_weight = float (G .number_of_edges ())
508576
577+ global _apsp
578+
509579 # Start compute edge Ricci flow
510580 t0 = time .time ()
511581
@@ -517,6 +587,10 @@ def _compute_ricci_flow(G: nx.Graph, weight="weight",
517587 for (v1 , v2 ) in G .edges ():
518588 G [v1 ][v2 ]["original_RC" ] = G [v1 ][v2 ]["ricciCurvature" ]
519589
590+ # clear the APSP since the graph have changed.
591+ _apsp = {}
592+
593+
520594 # Start the Ricci flow process
521595 for i in range (iterations ):
522596 for (v1 , v2 ) in G .edges ():
@@ -550,7 +624,10 @@ def _compute_ricci_flow(G: nx.Graph, weight="weight",
550624 normalized_weight = float (G .number_of_edges ())
551625
552626 for n1 , n2 in G .edges ():
553- logger .debug (n1 , n2 , G [n1 ][n2 ])
627+ logger .debug ("%s %s %s" % (n1 , n2 , G [n1 ][n2 ]))
628+
629+ # clear the APSP since the graph have changed.
630+ _apsp = {}
554631
555632 logger .info ("\n %8f secs for Ricci flow computation." % (time .time () - t0 ))
556633
@@ -564,7 +641,8 @@ class OllivierRicci:
564641 """
565642
566643 def __init__ (self , G : nx .Graph , weight = "weight" , alpha = 0.5 , method = "OTD" ,
567- base = math .e , exp_power = 2 , proc = cpu_count (), chunksize = None , cache_maxsize = 1000000 ,
644+ base = math .e , exp_power = 2 , proc = cpu_count (), chunksize = None , shortest_path = "all_pairs" ,
645+ cache_maxsize = 1000000 ,
568646 nbr_topk = 1000 , verbose = "ERROR" ):
569647 """Initialized a container to compute Ollivier-Ricci curvature/flow.
570648
@@ -596,13 +674,15 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="OTD",
596674 Number of processor used for multiprocessing. (Default value = `cpu_count()`)
597675 chunksize : int
598676 Chunk size for multiprocessing, set None for auto decide. (Default value = `None`)
677+ shortest_path : {"all_pairs","pairwise"}
678+ Method to compute shortest path. (Default value = `all_pairs`)
599679 cache_maxsize : int
600680 Max size for LRU cache for pairwise shortest path computation.
601681 Set this to `None` for unlimited cache. (Default value = 1000000)
602682 nbr_topk : int
603683 Only take the top k edge weight neighbors for density distribution.
604684 Smaller k run faster but the result is less accurate. (Default value = 1000)
605- verbose: {"INFO","DEBUG","ERROR"}
685+ verbose : {"INFO","DEBUG","ERROR"}
606686 Verbose level. (Default value = "ERROR")
607687 - "INFO": show only iteration process log.
608688 - "DEBUG": show all output logs.
@@ -618,6 +698,7 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="OTD",
618698 self .proc = proc
619699 self .chunksize = chunksize
620700 self .cache_maxsize = cache_maxsize
701+ self .shortest_path = shortest_path
621702 self .nbr_topk = nbr_topk
622703
623704 self .set_verbose (verbose )
@@ -658,7 +739,8 @@ def compute_ricci_curvature_edges(self, edge_list=None):
658739 alpha = self .alpha , method = self .method ,
659740 base = self .base , exp_power = self .exp_power ,
660741 proc = self .proc , chunksize = self .chunksize ,
661- cache_maxsize = self .cache_maxsize , nbr_topk = self .nbr_topk )
742+ cache_maxsize = self .cache_maxsize , shortest_path = self .shortest_path ,
743+ nbr_topk = self .nbr_topk )
662744
663745 def compute_ricci_curvature (self ):
664746 """Compute Ricci curvature of edges and nodes.
@@ -684,6 +766,7 @@ def compute_ricci_curvature(self):
684766 alpha = self .alpha , method = self .method ,
685767 base = self .base , exp_power = self .exp_power ,
686768 proc = self .proc , chunksize = self .chunksize , cache_maxsize = self .cache_maxsize ,
769+ shortest_path = self .shortest_path ,
687770 nbr_topk = self .nbr_topk )
688771 return self .G
689772
@@ -724,5 +807,5 @@ def compute_ricci_flow(self, iterations=10, step=1, delta=1e-4, surgery=(lambda
724807 alpha = self .alpha , method = self .method ,
725808 base = self .base , exp_power = self .exp_power ,
726809 proc = self .proc , chunksize = self .chunksize , cache_maxsize = self .cache_maxsize ,
727- nbr_topk = self .nbr_topk )
810+ shortest_path = self . shortest_path , nbr_topk = self .nbr_topk )
728811 return self .G
0 commit comments