2525import time
2626from functools import lru_cache
2727
28- import cvxpy as cvx
2928import networkit as nk
3029import networkx as nx
3130import numpy as np
3938_Gk = nk .graph .Graph ()
4039_alpha = 0.5
4140_weight = "weight"
42- _method = "Sinkhorn "
41+ _method = "OTDSinkhornMix "
4342_base = math .e
4443_exp_power = 2
4544_proc = mp .cpu_count ()
4645_cache_maxsize = 1000000
4746_shortest_path = "all_pairs"
48- _nbr_topk = 1000
47+ _nbr_topk = 3000
48+ _OTDSinkhorn_threshold = 2000
4949_apsp = {}
5050
5151
@@ -157,8 +157,8 @@ def _distribute_densities(source, target):
157157 else : # all_pairs
158158 d = _apsp [np .ix_ (source_topknbr , target_topknbr )] # transportation matrix
159159
160- x = np .array ([ x ]). T # the mass that source neighborhood initially owned
161- y = np .array ([ y ]). T # the mass that target neighborhood needs to received
160+ x = np .array (x ) # the mass that source neighborhood initially owned
161+ y = np .array (y ) # the mass that target neighborhood needs to received
162162
163163 logger .debug ("%8f secs density matrix construction for edge." % (time .time () - t0 ))
164164
@@ -221,20 +221,9 @@ def _optimal_transportation_distance(x, y, d):
221221 """
222222
223223 t0 = time .time ()
224- rho = cvx .Variable ((len (y ), len (x ))) # the transportation plan rho
225-
226- # objective function d(x,y) * rho * x, need to do element-wise multiply here
227- obj = cvx .Minimize (cvx .sum (cvx .multiply (np .multiply (d .T , x .T ), rho )))
228-
229- # \sigma_i rho_{ij}=[1,1,...,1]
230- source_sum = cvx .sum (rho , axis = 0 , keepdims = True )
231- constrains = [rho @ x == y , source_sum == np .ones ((1 , (len (x )))), 0 <= rho , rho <= 1 ]
232- prob = cvx .Problem (obj , constrains )
233-
234- m = prob .solve () # change solver here if you want
235- # solve for optimal transportation cost
236-
237- logger .debug ("%8f secs for cvxpy. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
224+ m = ot .emd2 (x , y , d )
225+ logger .debug (
226+ "%8f secs for Wasserstein dist. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
238227
239228 return m
240229
@@ -260,7 +249,7 @@ def _sinkhorn_distance(x, y, d):
260249 t0 = time .time ()
261250 m = ot .sinkhorn2 (x , y , d , 1e-1 , method = 'sinkhorn' )[0 ]
262251 logger .debug (
263- "%8f secs for Sinkhorn. dist. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
252+ "%8f secs for Sinkhorn dist. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
264253
265254 return m
266255
@@ -326,14 +315,14 @@ def _compute_ricci_curvature_single_edge(source, target):
326315
327316 # If the weight of edge is too small, return 0 instead.
328317 if _Gk .weight (source , target ) < EPSILON :
329- logger .warning ("Zero weight edge detected for edge (%s,%s), return Ricci Curvature as 0 instead." %
318+ logger .trace ("Zero weight edge detected for edge (%s,%s), return Ricci Curvature as 0 instead." %
330319 (source , target ))
331320 return {(source , target ): 0 }
332321
333322 # compute transportation distance
334323 m = 1 # assign an initial cost
335- assert _method in ["OTD" , "ATD" , "Sinkhorn" ], \
336- 'Method %s not found, support method:["OTD", "ATD", "Sinkhorn"]' % _method
324+ assert _method in ["OTD" , "ATD" , "Sinkhorn" , "OTDSinkhornMix" ], \
325+ 'Method %s not found, support method:["OTD", "ATD", "Sinkhorn", "OTDSinkhornMix ]' % _method
337326 if _method == "OTD" :
338327 x , y , d = _distribute_densities (source , target )
339328 m = _optimal_transportation_distance (x , y , d )
@@ -342,6 +331,14 @@ def _compute_ricci_curvature_single_edge(source, target):
342331 elif _method == "Sinkhorn" :
343332 x , y , d = _distribute_densities (source , target )
344333 m = _sinkhorn_distance (x , y , d )
334+ elif _method == "OTDSinkhornMix" :
335+ x , y , d = _distribute_densities (source , target )
336+ # When x and y are small (usually around 2000 to 3000), ot.emd2 is way faster than ot.sinkhorn2
337+ # So we only do sinkhorn when both x and y are too large for ot.emd2
338+ if len (x ) > _OTDSinkhorn_threshold and len (y ) > _OTDSinkhorn_threshold :
339+ m = _sinkhorn_distance (x , y , d )
340+ else :
341+ m = _optimal_transportation_distance (x , y , d )
345342
346343 # compute Ricci curvature: k=1-(m_{x,y})/d(x,y)
347344 result = 1 - (m / _Gk .weight (source , target )) # Divided by the length of d(i, j)
@@ -356,9 +353,9 @@ def _wrap_compute_single_edge(stuff):
356353
357354
358355def _compute_ricci_curvature_edges (G : nx .Graph , weight = "weight" , edge_list = [],
359- alpha = 0.5 , method = "OTD " ,
356+ alpha = 0.5 , method = "OTDSinkhornMix " ,
360357 base = math .e , exp_power = 2 , proc = mp .cpu_count (), chunksize = None , cache_maxsize = 1000000 ,
361- shortest_path = "all_pairs" , nbr_topk = 1000 ):
358+ shortest_path = "all_pairs" , nbr_topk = 3000 ):
362359 """Compute Ricci curvature for edges in given edge lists.
363360
364361 Parameters
@@ -375,12 +372,14 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
375372 E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
376373 (Default value = 0.5)
377374 method : {"OTD", "ATD", "Sinkhorn"}
378- The optimal transportation distance computation method. (Default value = "OTD ")
375+ The optimal transportation distance computation method. (Default value = "OTDSinkhornMix ")
379376
380377 Transportation method:
381378 - "OTD" for Optimal Transportation Distance,
382379 - "ATD" for Average Transportation Distance.
383- - "Sinkhorn" for OTD approximated Sinkhorn distance. (faster)
380+ - "Sinkhorn" for OTD approximated Sinkhorn distance.
381+ - "OTDSinkhornMix" use OTD for nodes of edge with less than _OTDSinkhorn_threshold(default 2000) neighbors,
382+ use Sinkhorn for faster computation with nodes of edge more neighbors. (OTD is faster for smaller cases)
384383 base : float
385384 Base variable for weight distribution. (Default value = `math.e`)
386385 exp_power : float
@@ -396,7 +395,7 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
396395 Method to compute shortest path. (Default value = `all_pairs`)
397396 nbr_topk : int
398397 Only take the top k edge weight neighbors for density distribution.
399- Smaller k run faster but the result is less accurate. (Default value = 1000 )
398+ Smaller k run faster but the result is less accurate. (Default value = 3000 )
400399
401400 Returns
402401 -------
@@ -625,10 +624,10 @@ class OllivierRicci:
625624
626625 """
627626
628- def __init__ (self , G : nx .Graph , weight = "weight" , alpha = 0.5 , method = "Sinkhorn " ,
627+ def __init__ (self , G : nx .Graph , weight = "weight" , alpha = 0.5 , method = "OTDSinkhornMix " ,
629628 base = math .e , exp_power = 2 , proc = mp .cpu_count (), chunksize = None , shortest_path = "all_pairs" ,
630629 cache_maxsize = 1000000 ,
631- nbr_topk = 1000 , verbose = "ERROR" ):
630+ nbr_topk = 3000 , verbose = "ERROR" ):
632631 """Initialized a container to compute Ollivier-Ricci curvature/flow.
633632
634633 Parameters
@@ -643,12 +642,14 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
643642 E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
644643 (Default value = 0.5)
645644 method : {"OTD", "ATD", "Sinkhorn"}
646- The optimal transportation distance computation method. (Default value = "Sinkhorn ")
645+ The optimal transportation distance computation method. (Default value = "OTDSinkhornMix ")
647646
648647 Transportation method:
649648 - "OTD" for Optimal Transportation Distance,
650649 - "ATD" for Average Transportation Distance.
651- - "Sinkhorn" for OTD approximated Sinkhorn distance. (faster)
650+ - "Sinkhorn" for OTD approximated Sinkhorn distance.
651+ - "OTDSinkhornMix" use OTD for nodes of edge with less than _OTDSinkhorn_threshold(default 2000) neighbors,
652+ use Sinkhorn for faster computation with nodes of edge more neighbors. (OTD is faster for smaller cases)
652653 base : float
653654 Base variable for weight distribution. (Default value = `math.e`)
654655 exp_power : float
@@ -664,7 +665,7 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
664665 Set this to `None` for unlimited cache. (Default value = 1000000)
665666 nbr_topk : int
666667 Only take the top k edge weight neighbors for density distribution.
667- Smaller k run faster but the result is less accurate. (Default value = 1000 )
668+ Smaller k run faster but the result is less accurate. (Default value = 3000 )
668669 verbose : {"INFO", "TRACE","DEBUG","ERROR"}
669670 Verbose level. (Default value = "ERROR")
670671 - "INFO": show only iteration process log.
@@ -765,13 +766,13 @@ def compute_ricci_curvature(self):
765766 nbr_topk = self .nbr_topk )
766767 return self .G
767768
768- def compute_ricci_flow (self , iterations = 20 , step = 1 , delta = 1e-4 , surgery = (lambda G , * args , ** kwargs : G , 100 )):
769+ def compute_ricci_flow (self , iterations = 10 , step = 1 , delta = 1e-4 , surgery = (lambda G , * args , ** kwargs : G , 100 )):
769770 """Compute the given Ricci flow metric of each edge of a given connected NetworkX graph.
770771
771772 Parameters
772773 ----------
773774 iterations : int
774- Iterations to require Ricci flow metric. (Default value = 100 )
775+ Iterations to require Ricci flow metric. (Default value = 10 )
775776 step : float
776777 Step size for gradient decent process. (Default value = 1)
777778 delta : float
0 commit comments