|
1 | 1 | import time |
2 | 2 | from enum import IntEnum |
| 3 | +from collections import OrderedDict |
3 | 4 |
|
4 | 5 | import numpy as np |
5 | 6 |
|
@@ -866,11 +867,13 @@ def calculate_log_reg_coefficients(self): |
866 | 867 | self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))] |
867 | 868 | self.log_reg_coeffs_orig = self.log_reg_coeffs.copy() |
868 | 869 |
|
869 | | - for i in range(len(self.log_reg_coeffs)): |
| 870 | + min_values = nanmin(self.data.X, axis=0) |
| 871 | + max_values = nanmax(self.data.X, axis=0) |
| 872 | + |
| 873 | + for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), |
| 874 | + min_values, max_values): |
870 | 875 | if self.log_reg_coeffs[i].shape[1] == 1: |
871 | 876 | coef = self.log_reg_coeffs[i] |
872 | | - min_t = nanmin(self.data.X, axis=0)[i] |
873 | | - max_t = nanmax(self.data.X, axis=0)[i] |
874 | 877 | self.log_reg_coeffs[i] = np.hstack((coef * min_t, coef * max_t)) |
875 | 878 | self.log_reg_cont_data_extremes.append( |
876 | 879 | [sorted([min_t, max_t], reverse=(c < 0)) for c in coef]) |
@@ -1105,13 +1108,15 @@ def send_report(self): |
1105 | 1108 |
|
1106 | 1109 | @staticmethod |
1107 | 1110 | def reconstruct_domain(original, preprocessed): |
1108 | | - attrs = [] |
| 1111 | + # abuse dict to make "in" comparisons faster |
| 1112 | + attrs = OrderedDict() |
1109 | 1113 | for attr in preprocessed.attributes: |
1110 | 1114 | cv = attr._compute_value.variable._compute_value |
1111 | 1115 | var = cv.variable if cv else original[attr.name] |
1112 | | - if var in attrs: |
| 1116 | + if var in attrs: # the reason for OrderedDict |
1113 | 1117 | continue |
1114 | | - attrs.append(var) |
| 1118 | + attrs[var] = None # we only need keys |
| 1119 | + attrs = list(attrs.keys()) |
1115 | 1120 | return Domain(attrs, original.class_var, original.metas) |
1116 | 1121 |
|
1117 | 1122 | @staticmethod |
|
0 commit comments