@@ -418,25 +418,128 @@ def __init__(self, absolute, axis=1, impute=False):
418418 self .absolute = absolute
419419
420420 def compute_distances (self , x1 , x2 ):
421- if x2 is None :
422- x2 = x1
423421 rho = self .compute_correlation (x1 , x2 )
424422 if self .absolute :
425423 return (1. - np .abs (rho )) / 2.
426424 else :
427425 return (1. - rho ) / 2.
428426
429427 def compute_correlation (self , x1 , x2 ):
430- pass
428+ raise NotImplementedError ()
431429
432430
433431class SpearmanModel (CorrelationDistanceModel ):
434432 def compute_correlation (self , x1 , x2 ):
435- rho = stats .spearmanr (x1 , x2 , axis = self .axis )[0 ]
436- if isinstance (rho , np .float ):
437- return np .array ([[rho ]])
438- slc = x1 .shape [1 - self .axis ]
439- return rho [:slc , slc :]
433+ if x2 is None :
434+ n1 = x1 .shape [1 - self .axis ]
435+ if n1 == 2 :
436+ # Special case to properly fill degenerate self correlations
437+ # (nan, inf on the diagonals)
438+ rho = stats .spearmanr (x1 , x1 , axis = self .axis )[0 ]
439+ assert rho .shape == (4 , 4 )
440+ rho = rho [:2 , :2 ].copy ()
441+ else :
442+ # scalar if n1 == 1
443+ rho = stats .spearmanr (x1 , axis = self .axis )[0 ]
444+ return np .atleast_2d (rho )
445+ else :
446+ return _spearmanr2 (x1 , x2 , axis = self .axis )
447+
448+
449+ def _spearmanr2 (a , b , axis = 0 ):
450+ """
451+ Compute all pairwise spearman rank moment correlations between rows
452+ or columns of a and b
453+
454+ Parameters
455+ ----------
456+ a : (N, M) numpy.ndarray
457+ The input cases a.
458+ b : (J, K) numpy.ndarray
459+ The input cases b. In case of axis == 0: J must equal N;
460+ otherwise if axis == 1 then K must equal M.
461+ axis : int
462+ If 0 the correlation are computed between a and b's columns.
463+ Otherwise if 1 the correlations are computed between rows.
464+
465+ Returns
466+ -------
467+ cor : (N, J) or (M, K) nd.array
468+ If axis == 0 then (N, J) matrix of correlations between a x b columns
469+ else a (N, J) matrix of correlations between a x b rows.
470+
471+ See Also
472+ --------
473+ scipy.stats.spearmanr
474+ """
475+ a , b = np .atleast_2d (a , b )
476+ assert a .shape [axis ] == b .shape [axis ]
477+ ar = np .apply_along_axis (stats .rankdata , axis , a )
478+ br = np .apply_along_axis (stats .rankdata , axis , b )
479+
480+ return _corrcoef2 (ar , br , axis = axis )
481+
482+
483+ def _corrcoef2 (a , b , axis = 0 ):
484+ """
485+ Compute all pairwise Pearson product-moment correlation coefficients
486+ between rows or columns of a and b
487+
488+ Parameters
489+ ----------
490+ a : (N, M) numpy.ndarray
491+ The input cases a.
492+ b : (J, K) numpy.ndarray
493+ The input cases b. In case of axis == 0: J must equal N;
494+ otherwise if axis == 1 then K must equal M.
495+ axis : int
496+ If 0 the correlation are computed between a and b's columns.
497+ Otherwise if 1 the correlations are computed between rows.
498+
499+ Returns
500+ -------
501+ cor : (N, J) or (M, K) nd.array
502+ If axis == 0 then (N, J) matrix of correlations between a x b columns
503+ else a (N, J) matrix of correlations between a x b rows.
504+
505+ See Also
506+ --------
507+ numpy.corrcoef
508+ """
509+ a , b = np .atleast_2d (a , b )
510+ if not (axis == 0 or axis == 1 ):
511+ raise ValueError ("Invalid axis {} (only 0 or 1 accepted)" .format (axis ))
512+
513+ mean_a = np .mean (a , axis = axis , keepdims = True )
514+ mean_b = np .mean (b , axis = axis , keepdims = True )
515+ assert a .shape [axis ] == b .shape [axis ]
516+
517+ n = a .shape [1 - axis ]
518+ m = b .shape [1 - axis ]
519+
520+ a = a - mean_a
521+ b = b - mean_b
522+
523+ if axis == 0 :
524+ C = a .T .dot (b )
525+ assert C .shape == (n , m )
526+ elif axis == 1 :
527+ C = a .dot (b .T )
528+ assert C .shape == (n , m )
529+
530+ ss_a = np .sum (a ** 2 , axis = axis , keepdims = True )
531+ ss_b = np .sum (b ** 2 , axis = axis , keepdims = True )
532+
533+ if axis == 0 :
534+ ss_a = ss_a .T
535+ else :
536+ ss_b = ss_b .T
537+
538+ assert ss_a .shape == (n , 1 )
539+ assert ss_b .shape == (1 , m )
540+ C /= np .sqrt (ss_a )
541+ C /= np .sqrt (ss_b )
542+ return C
440543
441544
442545class CorrelationDistance (Distance ):
@@ -455,10 +558,11 @@ def fit(self, _):
455558
456559class PearsonModel (CorrelationDistanceModel ):
457560 def compute_correlation (self , x1 , x2 ):
458- if self .axis == 0 :
459- x1 = x1 .T
460- x2 = x2 .T
461- return np .array ([[stats .pearsonr (i , j )[0 ] for j in x2 ] for i in x1 ])
561+ if x2 is None :
562+ c = np .corrcoef (x1 , rowvar = self .axis == 1 )
563+ return np .atleast_2d (c )
564+ else :
565+ return _corrcoef2 (x1 , x2 , axis = self .axis )
462566
463567
464568class PearsonR (CorrelationDistance ):
0 commit comments