12
12
array([0, 1])
13
13
"""
14
14
15
- import numpy as np
16
15
from typing import Any
17
16
17
+ import numpy as np
18
+
18
19
19
20
class AdaBoost :
20
21
def __init__ (self , n_estimators : int = 50 ) -> None :
21
- """Initialize AdaBoost classifier.
22
+ """
23
+ Initialize AdaBoost classifier.
24
+
22
25
Args:
23
- n_estimators: Number of boosting rounds.
26
+ n_estimators: Number of boosting rounds (weak learners) .
24
27
"""
25
28
self .n_estimators : int = n_estimators
26
- self .alphas : list [float ] = [] # Weights for each weak learner
27
- self .models : list [dict [str , Any ]] = [] # List of weak learners (stumps)
29
+ self .alphas : list [float ] = [] # Weights assigned to each weak learner
30
+ self .models : list [dict [str , Any ]] = [] # Stores each decision stump
28
31
29
32
def fit (self , feature_matrix : np .ndarray , target : np .ndarray ) -> None :
30
- """Fit AdaBoost model.
33
+ """
34
+ Train AdaBoost model using decision stumps.
35
+
31
36
Args:
32
- feature_matrix: (n_samples, n_features) feature matrix
33
- target: (n_samples,) labels (0 or 1)
37
+ feature_matrix: 2D array of shape (n_samples, n_features)
38
+ target: 1D array of binary labels (0 or 1)
34
39
"""
35
40
n_samples , _ = feature_matrix .shape
41
+
42
+ # Initialize uniform sample weights
36
43
sample_weights = np .ones (n_samples ) / n_samples
44
+
45
+ # Reset model state
37
46
self .models = []
38
47
self .alphas = []
48
+
49
+ # Convert labels to {-1, 1} for boosting
39
50
y_signed = np .where (target == 0 , - 1 , 1 )
51
+
40
52
for _ in range (self .n_estimators ):
53
+ # Train a weighted decision stump
41
54
stump = self ._build_stump (feature_matrix , y_signed , sample_weights )
42
55
pred = stump ["pred" ]
43
56
err = stump ["error" ]
57
+
58
+ # Compute alpha (learner weight) with numerical stability
44
59
alpha = 0.5 * np .log ((1 - err ) / (err + 1e-10 ))
60
+
61
+ # Update sample weights to focus on misclassified points
45
62
sample_weights *= np .exp (- alpha * y_signed * pred )
46
63
sample_weights /= np .sum (sample_weights )
64
+
65
+ # Store the stump and its weight
47
66
self .models .append (stump )
48
67
self .alphas .append (alpha )
49
68
50
69
def predict (self , feature_matrix : np .ndarray ) -> np .ndarray :
51
- """Predict class labels for samples in feature_matrix.
70
+ """
71
+ Predict binary class labels for input samples.
72
+
52
73
Args:
53
- feature_matrix: (n_samples, n_features) feature matrix
74
+ feature_matrix: 2D array of shape (n_samples, n_features)
75
+
54
76
Returns:
55
- (n_samples,) predicted labels (0 or 1)
77
+ 1D array of predicted labels (0 or 1)
56
78
"""
57
79
clf_preds = np .zeros (feature_matrix .shape [0 ])
80
+
81
+ # Aggregate predictions from all stumps
58
82
for alpha , stump in zip (self .alphas , self .models ):
59
83
pred = self ._stump_predict (
60
- feature_matrix , stump ["feature" ], stump ["threshold" ], stump ["polarity" ]
84
+ feature_matrix ,
85
+ stump ["feature" ],
86
+ stump ["threshold" ],
87
+ stump ["polarity" ],
61
88
)
62
89
clf_preds += alpha * pred
90
+
91
+ # Final prediction: sign of weighted sum
63
92
return np .where (clf_preds >= 0 , 1 , 0 )
64
93
65
94
def _build_stump (
@@ -68,16 +97,30 @@ def _build_stump(
68
97
target_signed : np .ndarray ,
69
98
sample_weights : np .ndarray ,
70
99
) -> dict [str , Any ]:
71
- """Find the best decision stump for current weights."""
100
+ """
101
+ Build the best decision stump for current sample weights.
102
+
103
+ Returns:
104
+ Dictionary containing stump parameters and predictions.
105
+ """
72
106
_ , n_features = feature_matrix .shape
73
107
min_error = float ("inf" )
74
108
best_stump : dict [str , Any ] = {}
109
+
110
+ # Iterate over all features and thresholds
75
111
for feature in range (n_features ):
76
112
thresholds = np .unique (feature_matrix [:, feature ])
77
113
for threshold in thresholds :
78
114
for polarity in [1 , - 1 ]:
79
- pred = self ._stump_predict (feature_matrix , feature , threshold , polarity )
115
+ pred = self ._stump_predict (
116
+ feature_matrix ,
117
+ feature ,
118
+ threshold ,
119
+ polarity ,
120
+ )
80
121
error = np .sum (sample_weights * (pred != target_signed ))
122
+
123
+ # Keep stump with lowest weighted error
81
124
if error < min_error :
82
125
min_error = error
83
126
best_stump = {
@@ -87,15 +130,28 @@ def _build_stump(
87
130
"error" : error ,
88
131
"pred" : pred .copy (),
89
132
}
133
+
90
134
return best_stump
91
135
92
136
def _stump_predict (
93
- self , feature_matrix : np .ndarray , feature : int , threshold : float , polarity : int
137
+ self ,
138
+ feature_matrix : np .ndarray ,
139
+ feature : int ,
140
+ threshold : float ,
141
+ polarity : int ,
94
142
) -> np .ndarray :
95
- """Predict using a single decision stump."""
143
+ """
144
+ Predict using a single decision stump.
145
+
146
+ Returns:
147
+ 1D array of predictions in {-1, 1}
148
+ """
96
149
pred = np .ones (feature_matrix .shape [0 ])
150
+
151
+ # Apply polarity to threshold comparison
97
152
if polarity == 1 :
98
153
pred [feature_matrix [:, feature ] < threshold ] = - 1
99
154
else :
100
155
pred [feature_matrix [:, feature ] > threshold ] = - 1
101
- return pred
156
+
157
+ return pred
0 commit comments