@@ -15,15 +15,13 @@ def __init__(
1515 ensemble_size : int ,
1616 task_type : int ,
1717 metric : Scorer ,
18- sorted_initialization : bool = False ,
1918 bagging : bool = False ,
2019 mode : str = 'fast' ,
2120 random_state : np .random .RandomState = None ,
2221 ):
2322 self .ensemble_size = ensemble_size
2423 self .task_type = task_type
2524 self .metric = metric
26- self .sorted_initialization = sorted_initialization
2725 self .bagging = bagging
2826 self .mode = mode
2927 self .random_state = random_state
@@ -64,38 +62,47 @@ def _fast(self, predictions, labels):
6462
6563 ensemble_size = self .ensemble_size
6664
67- if self .sorted_initialization :
68- n_best = 20
69- indices = self ._sorted_initialization (predictions , labels , n_best )
70- for idx in indices :
71- ensemble .append (predictions [idx ])
72- order .append (idx )
73- ensemble_ = np .array (ensemble ).mean (axis = 0 )
74- ensemble_performance = calculate_score (
75- labels , ensemble_ , self .task_type , self .metric ,
76- ensemble_ .shape [1 ])
77- trajectory .append (ensemble_performance )
78- ensemble_size -= n_best
79-
65+ weighted_ensemble_prediction = np .zeros (predictions [0 ].shape )
66+ fant_ensemble_prediction = np .zeros (weighted_ensemble_prediction .shape )
8067 for i in range (ensemble_size ):
8168 scores = np .zeros ((len (predictions )))
8269 s = len (ensemble )
8370 if s == 0 :
84- weighted_ensemble_prediction = np . zeros ( predictions [ 0 ]. shape )
71+ weighted_ensemble_prediction . fill ( 0.0 )
8572 else :
86- # Memory-efficient averaging!
87- ensemble_prediction = np .zeros (ensemble [0 ].shape )
73+ weighted_ensemble_prediction .fill (0.0 )
8874 for pred in ensemble :
89- ensemble_prediction += pred
90- ensemble_prediction /= s
91-
92- weighted_ensemble_prediction = (s / float (s + 1 )) * ensemble_prediction
93- fant_ensemble_prediction = np .zeros (weighted_ensemble_prediction .shape )
75+ np .add (
76+ weighted_ensemble_prediction ,
77+ pred ,
78+ out = weighted_ensemble_prediction ,
79+ )
80+ np .multiply (
81+ weighted_ensemble_prediction ,
82+ 1 / s ,
83+ out = weighted_ensemble_prediction ,
84+ )
85+ np .multiply (
86+ weighted_ensemble_prediction ,
87+ (s / float (s + 1 )),
88+ out = weighted_ensemble_prediction ,
89+ )
90+
91+ # Memory-efficient averaging!
9492 for j , pred in enumerate (predictions ):
9593 # TODO: this could potentially be vectorized! - let's profile
9694 # the script first!
97- fant_ensemble_prediction [:, :] = \
98- weighted_ensemble_prediction + (1. / float (s + 1 )) * pred
95+ fant_ensemble_prediction .fill (0.0 )
96+ np .add (
97+ fant_ensemble_prediction ,
98+ weighted_ensemble_prediction ,
99+ out = fant_ensemble_prediction
100+ )
101+ np .add (
102+ fant_ensemble_prediction ,
103+ (1. / float (s + 1 )) * pred ,
104+ out = fant_ensemble_prediction
105+ )
99106 scores [j ] = self .metric ._optimum - calculate_score (
100107 solution = labels ,
101108 prediction = fant_ensemble_prediction ,
@@ -127,22 +134,6 @@ def _slow(self, predictions, labels):
127134
128135 ensemble_size = self .ensemble_size
129136
130- if self .sorted_initialization :
131- n_best = 20
132- indices = self ._sorted_initialization (predictions , labels , n_best )
133- for idx in indices :
134- ensemble .append (predictions [idx ])
135- order .append (idx )
136- ensemble_ = np .array (ensemble ).mean (axis = 0 )
137- ensemble_performance = calculate_score (
138- solution = labels ,
139- prediction = ensemble_ ,
140- task_type = self .task_type ,
141- metric = self .metric ,
142- all_scoring_functions = False )
143- trajectory .append (ensemble_performance )
144- ensemble_size -= n_best
145-
146137 for i in range (ensemble_size ):
147138 scores = np .zeros ([predictions .shape [0 ]])
148139 for j , pred in enumerate (predictions ):
@@ -180,16 +171,6 @@ def _calculate_weights(self):
180171
181172 self .weights_ = weights
182173
183- def _sorted_initialization (self , predictions , labels , n_best ):
184- perf = np .zeros ([predictions .shape [0 ]])
185-
186- for idx , prediction in enumerate (predictions ):
187- perf [idx ] = calculate_score (labels , prediction , self .task_type ,
188- self .metric , predictions .shape [1 ])
189-
190- indices = np .argsort (perf )[perf .shape [0 ] - n_best :]
191- return indices
192-
193174 def _bagging (self , predictions , labels , fraction = 0.5 , n_bags = 20 ):
194175 """Rich Caruana's ensemble selection method with bagging."""
195176 raise ValueError ('Bagging might not work with class-based interface!' )
0 commit comments