|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +from time import time |
| 4 | + |
| 5 | +from sklearn.datasets import load_iris |
| 6 | + |
| 7 | +from acton.acton import main as acton_main |
| 8 | + |
| 9 | +from alp.active_learning.active_learning import ActiveLearner as ActiveLearnerALP |
| 10 | + |
| 11 | +from libact.base.dataset import Dataset |
| 12 | +from libact.labelers import IdealLabeler |
| 13 | +from libact.query_strategies import UncertaintySampling, QueryByCommittee |
| 14 | +from libact.query_strategies.multiclass.expected_error_reduction import EER |
| 15 | +from libact.models.logistic_regression import LogisticRegression as LogisticRegressionLibact |
| 16 | + |
| 17 | +from modAL.models import ActiveLearner, Committee |
| 18 | +from modAL.expected_error import expected_error_reduction |
| 19 | + |
| 20 | +from sklearn.linear_model import LogisticRegression |
| 21 | + |
| 22 | + |
| 23 | +runtime = {} |
| 24 | + |
| 25 | + |
| 26 | +def timeit(n_reps=10): |
| 27 | + |
| 28 | + def timer(func): |
| 29 | + |
| 30 | + def timed_func(*args, **kwargs): |
| 31 | + start = time() |
| 32 | + for _ in range(n_reps): |
| 33 | + result = func(*args, **kwargs) |
| 34 | + end = time() |
| 35 | + print("%s has been executed in %f s avg for %d reps" % (func.__name__, (end - start)/n_reps, n_reps)) |
| 36 | + runtime[func.__name__] = (end - start)/n_reps |
| 37 | + return result |
| 38 | + |
| 39 | + return timed_func |
| 40 | + |
| 41 | + return timer |
| 42 | + |
| 43 | + |
| 44 | +@timeit() |
| 45 | +def libact_uncertainty(X, y, n_queries): |
| 46 | + y_train = np.array([None for _ in range(len(y))]) |
| 47 | + y_train[0], y_train[50], y_train[100] = 0, 1, 2 |
| 48 | + libact_train_dataset = Dataset(X, y_train) |
| 49 | + libact_full_dataset = Dataset(X, y) |
| 50 | + libact_learner = LogisticRegressionLibact(solver='liblinear', n_jobs=1, multi_class='ovr') #SVM(gamma='auto', probability=True) |
| 51 | + libact_qs = UncertaintySampling(libact_train_dataset, model=libact_learner, method='lc') |
| 52 | + libact_labeler = IdealLabeler(libact_full_dataset) |
| 53 | + libact_learner.train(libact_train_dataset) |
| 54 | + |
| 55 | + for _ in range(n_queries): |
| 56 | + query_idx = libact_qs.make_query() |
| 57 | + query_label = libact_labeler.label(X[query_idx]) |
| 58 | + libact_train_dataset.update(query_idx, query_label) |
| 59 | + libact_learner.train(libact_train_dataset) |
| 60 | + |
| 61 | + |
| 62 | +@timeit() |
| 63 | +def libact_EER(X, y, n_queries): |
| 64 | + y_train = np.array([None for _ in range(len(y))]) |
| 65 | + y_train[0], y_train[50], y_train[100] = 0, 1, 2 |
| 66 | + libact_train_dataset = Dataset(X, y_train) |
| 67 | + libact_full_dataset = Dataset(X, y) |
| 68 | + libact_learner = LogisticRegressionLibact(solver='liblinear', n_jobs=1, multi_class='ovr') #SVM(gamma='auto', probability=True) |
| 69 | + libact_qs = EER(libact_train_dataset, model=libact_learner, loss='01') |
| 70 | + libact_labeler = IdealLabeler(libact_full_dataset) |
| 71 | + libact_learner.train(libact_train_dataset) |
| 72 | + |
| 73 | + for _ in range(n_queries): |
| 74 | + query_idx = libact_qs.make_query() |
| 75 | + query_label = libact_labeler.label(X[query_idx]) |
| 76 | + libact_train_dataset.update(query_idx, query_label) |
| 77 | + libact_learner.train(libact_train_dataset) |
| 78 | + |
| 79 | + |
| 80 | +@timeit() |
| 81 | +def libact_QBC(X, y, n_queries): |
| 82 | + y_train = np.array([None for _ in range(len(y))]) |
| 83 | + y_train[0], y_train[50], y_train[100] = 0, 1, 2 |
| 84 | + libact_train_dataset = Dataset(X, y_train) |
| 85 | + libact_full_dataset = Dataset(X, y) |
| 86 | + libact_learner_list = [LogisticRegressionLibact(solver='liblinear', n_jobs=1, multi_class='ovr'), |
| 87 | + LogisticRegressionLibact(solver='liblinear', n_jobs=1, multi_class='ovr')] |
| 88 | + libact_qs = QueryByCommittee(libact_train_dataset, models=libact_learner_list, |
| 89 | + method='lc') |
| 90 | + libact_labeler = IdealLabeler(libact_full_dataset) |
| 91 | + for libact_learner in libact_learner_list: |
| 92 | + libact_learner.train(libact_train_dataset) |
| 93 | + |
| 94 | + for _ in range(n_queries): |
| 95 | + query_idx = libact_qs.make_query() |
| 96 | + query_label = libact_labeler.label(X[query_idx]) |
| 97 | + libact_train_dataset.update(query_idx, query_label) |
| 98 | + for libact_learner in libact_learner_list: |
| 99 | + libact_learner.train(libact_train_dataset) |
| 100 | + |
| 101 | + |
| 102 | +@timeit() |
| 103 | +def modAL_uncertainty(X, y, n_queries): |
| 104 | + modAL_learner = ActiveLearner(LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), |
| 105 | + X_training=X[[0, 50, 100]], y_training=y[[0, 50, 100]]) |
| 106 | + |
| 107 | + for _ in range(n_queries): |
| 108 | + query_idx, query_inst = modAL_learner.query(X) |
| 109 | + modAL_learner.teach(X[query_idx], y[query_idx]) |
| 110 | + |
| 111 | + |
| 112 | +@timeit() |
| 113 | +def modAL_QBC(X, y, n_queries): |
| 114 | + learner_list = [ActiveLearner(LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), |
| 115 | + X_training=X[[0, 50, 100]], y_training=y[[0, 50, 100]]), |
| 116 | + ActiveLearner(LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), |
| 117 | + X_training=X[[0, 50, 100]], y_training=y[[0, 50, 100]])] |
| 118 | + |
| 119 | + modAL_learner = Committee(learner_list) |
| 120 | + |
| 121 | + for _ in range(n_queries): |
| 122 | + query_idx, query_inst = modAL_learner.query(X) |
| 123 | + modAL_learner.teach(X[query_idx], y[query_idx]) |
| 124 | + |
| 125 | + |
| 126 | +@timeit() |
| 127 | +def modAL_EER(X, y, n_queries): |
| 128 | + modAL_learner = ActiveLearner(LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), |
| 129 | + query_strategy=expected_error_reduction, |
| 130 | + X_training=X[[0, 50, 100]], y_training=y[[0, 50, 100]]) |
| 131 | + |
| 132 | + for _ in range(n_queries): |
| 133 | + query_idx, query_inst = modAL_learner.query(X) |
| 134 | + modAL_learner.teach(X[query_idx], y[query_idx]) |
| 135 | + |
| 136 | + |
| 137 | +@timeit() |
| 138 | +# acton requires a txt format for data |
| 139 | +def acton_uncertainty(data_path, n_queries): |
| 140 | + # acton has no SVM support, so the LogisticRegression model is used |
| 141 | + acton_main( |
| 142 | + data_path=data_path, |
| 143 | + feature_cols=['feat01', 'feat02', 'feat03', 'feat04'], |
| 144 | + label_col='label', |
| 145 | + output_path='out.csv', |
| 146 | + n_epochs=n_queries, |
| 147 | + initial_count=3, |
| 148 | + recommender='UncertaintyRecommender', |
| 149 | + predictor='LogisticRegression') |
| 150 | + |
| 151 | + |
| 152 | +@timeit() |
| 153 | +# acton requires a txt format for data |
| 154 | +def acton_QBC(data_path, n_queries): |
| 155 | + # acton has no SVM support, so the LogisticRegression model is used |
| 156 | + acton_main( |
| 157 | + data_path=data_path, |
| 158 | + feature_cols=['feat01', 'feat02', 'feat03', 'feat04'], |
| 159 | + label_col='label', |
| 160 | + output_path='out.csv', |
| 161 | + n_epochs=n_queries, |
| 162 | + initial_count=3, |
| 163 | + recommender='QBCRecommender', |
| 164 | + predictor='LogisticRegressionCommittee') |
| 165 | + |
| 166 | + |
| 167 | +@timeit() |
| 168 | +def alp_uncertainty(X, y, n_queries): |
| 169 | + X_labeled, y_labeled = X[[0, 50, 100]], y[[0, 50, 100]] |
| 170 | + estimator = LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr') |
| 171 | + estimator.fit(X_labeled, y_labeled) |
| 172 | + learner = ActiveLearnerALP(strategy='least_confident') |
| 173 | + |
| 174 | + for _ in range(n_queries): |
| 175 | + query_idx = learner.rank(estimator, X, num_queries=1) |
| 176 | + X_labeled = np.concatenate((X_labeled, X[query_idx]), axis=0) |
| 177 | + y_labeled = np.concatenate((y_labeled, y[query_idx]), axis=0) |
| 178 | + estimator.fit(X_labeled, y_labeled) |
| 179 | + |
| 180 | + |
| 181 | +@timeit() |
| 182 | +def alp_QBC(X, y, n_queries): |
| 183 | + X_labeled, y_labeled = X[[0, 50, 100]], y[[0, 50, 100]] |
| 184 | + estimators = [LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr'), |
| 185 | + LogisticRegression(solver='liblinear', n_jobs=1, multi_class='ovr')] |
| 186 | + |
| 187 | + for estimator in estimators: |
| 188 | + estimator.fit(X_labeled, y_labeled) |
| 189 | + |
| 190 | + learner = ActiveLearnerALP(strategy='vote_entropy') |
| 191 | + |
| 192 | + for _ in range(n_queries): |
| 193 | + query_idx = learner.rank(estimators, X, num_queries=1) |
| 194 | + X_labeled = np.concatenate((X_labeled, X[query_idx]), axis=0) |
| 195 | + y_labeled = np.concatenate((y_labeled, y[query_idx]), axis=0) |
| 196 | + for estimator in estimators: |
| 197 | + estimator.fit(X_labeled, y_labeled) |
| 198 | + |
| 199 | + |
| 200 | +def comparisons(n_queries=10): |
| 201 | + # loading the data |
| 202 | + X, y = load_iris(return_X_y=True) |
| 203 | + |
| 204 | + libact_uncertainty(X, y, n_queries) |
| 205 | + libact_QBC(X, y, n_queries) |
| 206 | + libact_EER(X, y, n_queries) |
| 207 | + acton_uncertainty('iris.csv', n_queries) |
| 208 | + acton_QBC('iris.csv', n_queries) |
| 209 | + alp_uncertainty(X, y, n_queries) |
| 210 | + alp_QBC(X, y, n_queries) |
| 211 | + modAL_uncertainty(X, y, n_queries) |
| 212 | + modAL_QBC(X, y, n_queries) |
| 213 | + modAL_EER(X, y, n_queries) |
| 214 | + |
| 215 | + |
| 216 | +if __name__ == '__main__': |
| 217 | + comparisons() |
| 218 | + print(runtime) |
0 commit comments