|
13 | 13 | 'SpearmanRAbsolute', 'PearsonR', 'PearsonRAbsolute', 'Mahalanobis', |
14 | 14 | 'MahalanobisDistance'] |
15 | 15 |
|
| 16 | +# TODO: When we upgrade to numpy 1.13, change use argument copy=False in |
| 17 | +# nan_to_num instead of assignment |
16 | 18 |
|
17 | 19 | # TODO this *private* function is called from several widgets to prepare |
18 | 20 | # data for calling the below classes. After we (mostly) stopped relying |
@@ -255,11 +257,6 @@ def compute_distances(self, x1, x2): |
255 | 257 | call directly.""" |
256 | 258 | pass |
257 | 259 |
|
258 | | - @staticmethod |
259 | | - def check_no_two_tables(x2): |
260 | | - if x2 is not None: |
261 | | - raise ValueError("columns of two tables cannot be compared") |
262 | | - |
263 | 260 |
|
264 | 261 | class FittedDistanceModel(DistanceModel): |
265 | 262 | """ |
@@ -504,7 +501,16 @@ def __init__(self, attributes, impute, normalize, means, vars): |
504 | 501 | self.vars = vars |
505 | 502 |
|
506 | 503 | def compute_distances(self, x1, x2=None): |
507 | | - self.check_no_two_tables(x2) |
| 504 | + """ |
| 505 | + Compute distances between columns of x1. |
| 506 | +
|
| 507 | + The method |
| 508 | + - extracts normalized continuous attributes and then uses `row_norms` |
| 509 | + and `safe_sparse_do`t to compute the distance as x^2 - 2xy - y^2 |
| 510 | + (the trick from sklearn); |
| 511 | + - calls a function in Cython that adds the contributions of discrete |
| 512 | + columns |
| 513 | + """ |
508 | 514 | if self.normalize: |
509 | 515 | x1 = x1 - self.means |
510 | 516 | x1 /= np.sqrt(2 * self.vars) |
@@ -620,7 +626,6 @@ def __init__(self, attributes, impute, normalize, medians, mads): |
620 | 626 | self.mads = mads |
621 | 627 |
|
622 | 628 | def compute_distances(self, x1, x2=None): |
623 | | - self.check_no_two_tables(x2) |
624 | 629 | if self.normalize: |
625 | 630 | x1 = x1 - self.medians |
626 | 631 | x1 /= 2 |
@@ -691,7 +696,7 @@ def fit_rows(self, attributes, x, n_vals): |
691 | 696 | discrete = n_vals > 0 |
692 | 697 | x = self.discrete_to_indicators(x, discrete) |
693 | 698 | means = util.nanmean(x, axis=0) |
694 | | - np.nan_to_num(means, copy=False) |
| 699 | + means = np.nan_to_num(means) |
695 | 700 | return self.CosineModel(attributes, self.axis, self.impute, |
696 | 701 | discrete, means) |
697 | 702 |
|
@@ -748,7 +753,6 @@ def compute_distances(self, x1, x2): |
748 | 753 | x2 is not None) |
749 | 754 | else: |
750 | 755 | nans1 = _distance.any_nan_row(x1.T) |
751 | | - self.check_no_two_tables(x2) |
752 | 756 | return _distance.jaccard_cols( |
753 | 757 | nonzeros1, x1, nans1, self.ps) |
754 | 758 |
|
|
0 commit comments