@@ -12,7 +12,6 @@ class QueryStrategy(metaclass=ABCMeta):
1212 A QueryStrategy advices on which unlabeled data to be queried next given
1313 a pool of labeled and unlabeled data.
1414 """
15-
1615 def __init__ (self , dataset , ** kwargs ):
1716 self ._dataset = dataset
1817 dataset .on_update (self .update )
@@ -22,53 +21,136 @@ def dataset(self):
2221 return self ._dataset
2322
2423 def update (self , entry_id , label ):
24+ """Update the internal states of the QueryStrategy after each queried
25+ sample being labeled.
26+
27+ Parameters
28+ ----------
29+ entry_id : int
30+ The index of the newly labeled sample.
31+
32+ label : float
33+ The label of the queried sample.
34+ """
2535 pass
2636
2737 @abstractmethod
2838 def make_query (self ):
39+ """Return the index of the sample to be queried and labeled. Read-only.
40+
41+ No modification to the internal states.
42+
43+ Returns
44+ -------
45+ ask_id : int
46+ The index of the next unlabeled sample to be queried and labeled.
47+ """
2948 pass
3049
3150
3251class Labeler (metaclass = ABCMeta ):
3352 """Label the queries made by QueryStrategies
3453
35- A Labeler assigns labels to the features queried by QueryStrategies.
54+ Assign labels to the samples queried by QueryStrategies.
3655 """
37-
3856 @abstractmethod
3957 def label (self , feature ):
58+ """Return the class labels for the input feature array.
59+
60+ Parameters
61+ ----------
62+ feature : array-like, shape (n_features,)
63+ The feature vector whose label is to queried.
64+
65+ Returns
66+ -------
67+ label : int
68+ The class label of the queried feature.
69+ """
4070 pass
4171
4272
4373class Model (metaclass = ABCMeta ):
4474 """Classification Model
4575
46- A Model is trained on a training dataset and produces a class-predicting
47- function for future features .
76+ A Model returns a class-predicting function for future samples after
77+ trained on a training dataset .
4878 """
49-
5079 @abstractmethod
5180 def train (self , dataset , * args , ** kwargs ):
81+ """Train a model according to the given training dataset.
82+
83+ Parameters
84+ ----------
85+ dataset : Dataset object
86+ The training dataset the model is to be trained on.
87+
88+ Returns
89+ -------
90+ self : object
91+ Returns self.
92+ """
5293 pass
5394
5495 @abstractmethod
5596 def predict (self , feature , * args , ** kwargs ):
97+ """Predict the class labels for the input samples
98+
99+ Parameters
100+ ----------
101+ feature : array-like, shape (n_samples, n_features)
102+ The unlabeled samples whose labels are to be predicted.
103+
104+ Returns
105+ -------
106+ y_pred : array-like, shape (n_samples,)
107+ The class labels for samples in the feature array.
108+ """
56109 pass
57110
58111 @abstractmethod
59112 def score (self , testing_dataset , * args , ** kwargs ):
113+ """Return the mean accuracy on the test dataset
114+
115+ Parameters
116+ ----------
117+ testing_dataset : Dataset object
118+ The testing dataset used to measure the perforance of the trained model.
119+
120+ Returns
121+ -------
122+ score : float
123+ Mean accuracy of self.predict(X) wrt. y.
124+ """
60125 pass
61126
62127
63128class ContinuousModel (Model ):
64129 """Classification Model with intermediate continuous output
65130
66131 A continuous classification model is able to output a real-valued vector
67- for each features provided. The output vector is of shape (n_samples, n_classs)
68- for an input feature matrix X of shape (n_samples, n_features). The larger the
69- kth-column value is, the more likely a feature x belongs the class k.
132+ for each features provided.
70133 """
71-
72134 @abstractmethod
73135 def predict_real (self , feature , * args , ** kwargs ):
136+ """Predict confidence scores for samples.
137+
138+ Returns the confidence score for each (sample, class) combination.
139+
140+ The larger the value for entry (sample=x, class=k) is, the more confident
141+ the model is about the sample x belonging to the class k.
142+
143+ Take Logistic Regression as example, the return value is the signed dis-
144+ tance of that sample to the hyperplane.
145+
146+ Parameters
147+ ----------
148+ feature : array-like, shape (n_samples, n_features)
149+ The samples whose confidence scores are to be predicted.
150+
151+ Returns
152+ -------
153+ X : array-like, shape (n_samples, n_classes)
154+ Each entry is the confidence scores per (sample, class) combination.
155+ """
74156 pass
0 commit comments