|
1 | 1 | import numpy as np |
2 | 2 |
|
3 | | -from Orange.data import Domain |
| 3 | +from Orange.data import Domain, ContinuousVariable |
4 | 4 | from Orange.statistics import distribution |
5 | 5 | from Orange.util import Reprable |
6 | 6 | from .preprocess import Normalize |
@@ -42,26 +42,30 @@ def normalize(self, dist, var): |
42 | 42 | var = self.normalize_by_sd(dist, var) |
43 | 43 | elif self.norm_type == Normalize.NormalizeBySpan: |
44 | 44 | var = self.normalize_by_span(dist, var) |
45 | | - var.number_of_decimals = None |
46 | 45 | return var |
47 | 46 |
|
48 | | - def normalize_by_sd(self, dist, var): |
| 47 | + def normalize_by_sd(self, dist, var: ContinuousVariable) -> ContinuousVariable: |
49 | 48 | avg, sd = (dist.mean(), dist.standard_deviation()) if dist.size else (0, 1) |
50 | 49 | if sd == 0: |
51 | 50 | sd = 1 |
52 | 51 | if self.center: |
53 | 52 | compute_val = Norm(var, avg, 1 / sd) |
| 53 | + num_decimals = 3 |
54 | 54 | else: |
55 | 55 | compute_val = Norm(var, 0, 1 / sd) |
56 | | - return var.copy(compute_value=compute_val) |
| 56 | + num_decimals = None |
| 57 | + num_decimals += int(-np.floor(np.log10(sd))) |
| 58 | + return var.copy(compute_value=compute_val, number_of_decimals=num_decimals) |
57 | 59 |
|
58 | | - def normalize_by_span(self, dist, var): |
| 60 | + def normalize_by_span(self, dist, var: ContinuousVariable) -> ContinuousVariable: |
59 | 61 | dma, dmi = (dist.max(), dist.min()) if dist.shape[1] else (np.nan, np.nan) |
60 | 62 | diff = dma - dmi |
61 | 63 | if diff < 1e-15: |
62 | 64 | diff = 1 |
63 | 65 | if self.zero_based: |
64 | 66 | compute_val = Norm(var, dmi, 1 / diff) |
| 67 | + num_decimals = 3 |
65 | 68 | else: |
66 | 69 | compute_val = Norm(var, (dma + dmi) / 2, 2 / diff) |
67 | | - return var.copy(compute_value=compute_val) |
| 70 | + num_decimals = None |
| 71 | + return var.copy(compute_value=compute_val, number_of_decimals=num_decimals) |
0 commit comments