Skip to content

Commit af6bb32

Browse files
committed
add: docstrings for multilabel strategies added
1 parent fe6bba0 commit af6bb32

File tree

2 files changed

+79
-19
lines changed

2 files changed

+79
-19
lines changed

examples/multilabel_svm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
learner = ActiveLearner(
3030
estimator=OneVsRestClassifier(SVC(probability=True)),
31-
query_strategy=mean_score,
31+
query_strategy=avg_score,
3232
X_training=X_initial, y_training=y_initial
3333
)
3434

modAL/multilabel.py

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -119,21 +119,51 @@ def mean_max_loss(classifier: BaseEstimator,
119119
return query_idx, X_pool[query_idx]
120120

121121

122-
def max_uncertainty(classifier: BaseEstimator,
123-
X_pool: modALinput,
124-
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
125-
classwise_uncertainty = classifier.predict_proba(X_pool)
126-
classwise_max = np.max(classwise_uncertainty, axis=1)
127-
query_idx = multi_argmax(classwise_max, n_instances)
122+
def min_confidence(classifier: BaseEstimator,
123+
X_pool: modALinput,
124+
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
125+
"""
126+
MinConfidence query strategy for multilabel classification.
127+
128+
For more details on this query strategy, see
129+
Esuli and Sebastiani., Active Learning Strategies for Multi-Label Text Classification
130+
(http://dx.doi.org/10.1007/978-3-642-00958-7_12)
131+
132+
Args:
133+
classifier: The multilabel classifier for which the labels are to be queried.
134+
X: The pool of samples to query from.
135+
136+
Returns:
137+
The index of the instance from X chosen to be labelled; the instance from X chosen to be labelled.
138+
"""
139+
140+
classwise_confidence = classifier.predict_proba(X_pool)
141+
classwise_min = np.min(classwise_confidence, axis=1)
142+
query_idx = multi_argmax((-1)*classwise_min, n_instances)
128143

129144
return query_idx, X_pool[query_idx]
130145

131146

132-
def mean_uncertainty(classifier: BaseEstimator,
133-
X_pool: modALinput,
134-
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
135-
classwise_uncertainty = classifier.predict_proba(X_pool)
136-
classwise_mean = np.mean(classwise_uncertainty, axis=1)
147+
def avg_confidence(classifier: BaseEstimator,
148+
X_pool: modALinput,
149+
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
150+
"""
151+
AvgConfidence query strategy for multilabel classification.
152+
153+
For more details on this query strategy, see
154+
Esuli and Sebastiani., Active Learning Strategies for Multi-Label Text Classification
155+
(http://dx.doi.org/10.1007/978-3-642-00958-7_12)
156+
157+
Args:
158+
classifier: The multilabel classifier for which the labels are to be queried.
159+
X: The pool of samples to query from.
160+
161+
Returns:
162+
The index of the instance from X chosen to be labelled; the instance from X chosen to be labelled.
163+
"""
164+
165+
classwise_confidence = classifier.predict_proba(X_pool)
166+
classwise_mean = np.mean(classwise_confidence, axis=1)
137167
query_idx = multi_argmax(classwise_mean, n_instances)
138168

139169
return query_idx, X_pool[query_idx]
@@ -142,21 +172,51 @@ def mean_uncertainty(classifier: BaseEstimator,
142172
def max_score(classifier: BaseEstimator,
143173
X_pool: modALinput,
144174
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
145-
classwise_uncertainty = classifier.predict_proba(X_pool)
175+
"""
176+
MaxScore query strategy for multilabel classification.
177+
178+
For more details on this query strategy, see
179+
Esuli and Sebastiani., Active Learning Strategies for Multi-Label Text Classification
180+
(http://dx.doi.org/10.1007/978-3-642-00958-7_12)
181+
182+
Args:
183+
classifier: The multilabel classifier for which the labels are to be queried.
184+
X: The pool of samples to query from.
185+
186+
Returns:
187+
The index of the instance from X chosen to be labelled; the instance from X chosen to be labelled.
188+
"""
189+
190+
classwise_confidence = classifier.predict_proba(X_pool)
146191
classwise_predictions = classifier.predict(X_pool)
147-
classwise_scores = classwise_uncertainty*(classwise_predictions - 1/2)
192+
classwise_scores = classwise_confidence*(classwise_predictions - 1/2)
148193
classwise_max = np.max(classwise_scores, axis=1)
149194
query_idx = multi_argmax(classwise_max, n_instances)
150195

151196
return query_idx, X_pool[query_idx]
152197

153198

154-
def mean_score(classifier: BaseEstimator,
155-
X_pool: modALinput,
156-
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
157-
classwise_uncertainty = classifier.predict_proba(X_pool)
199+
def avg_score(classifier: BaseEstimator,
200+
X_pool: modALinput,
201+
n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
202+
"""
203+
AvgScore query strategy for multilabel classification.
204+
205+
For more details on this query strategy, see
206+
Esuli and Sebastiani., Active Learning Strategies for Multi-Label Text Classification
207+
(http://dx.doi.org/10.1007/978-3-642-00958-7_12)
208+
209+
Args:
210+
classifier: The multilabel classifier for which the labels are to be queried.
211+
X: The pool of samples to query from.
212+
213+
Returns:
214+
The index of the instance from X chosen to be labelled; the instance from X chosen to be labelled.
215+
"""
216+
217+
classwise_confidence = classifier.predict_proba(X_pool)
158218
classwise_predictions = classifier.predict(X_pool)
159-
classwise_scores = classwise_uncertainty*(classwise_predictions-1/2)
219+
classwise_scores = classwise_confidence*(classwise_predictions-1/2)
160220
classwise_mean = np.mean(classwise_scores, axis=1)
161221
query_idx = multi_argmax(classwise_mean, n_instances)
162222

0 commit comments

Comments
 (0)