|
65 | 65 | # To apply an classifier on this data, we need to flatten the image, to
|
66 | 66 | # turn the data in a (samples, feature) matrix:
|
67 | 67 | n_samples = len(digits.data)
|
68 |
| -data = digits.data / 16. |
| 68 | +data = digits.data / 16.0 |
69 | 69 | data -= data.mean(axis=0)
|
70 | 70 |
|
71 | 71 | # We learn the digits on the first half of the digits
|
72 |
| -data_train, targets_train = data[:n_samples // 2], digits.target[:n_samples // 2] |
| 72 | +data_train, targets_train = ( |
| 73 | + data[: n_samples // 2], |
| 74 | + digits.target[: n_samples // 2], |
| 75 | +) |
73 | 76 |
|
74 | 77 |
|
75 | 78 | # Now predict the value of the digit on the second half:
|
76 |
| -data_test, targets_test = data[n_samples // 2:], digits.target[n_samples // 2:] |
77 |
| -#data_test = scaler.transform(data_test) |
| 79 | +data_test, targets_test = ( |
| 80 | + data[n_samples // 2 :], |
| 81 | + digits.target[n_samples // 2 :], |
| 82 | +) |
| 83 | +# data_test = scaler.transform(data_test) |
78 | 84 |
|
79 | 85 | # fix model parameters:
|
80 |
| -GAMMA = .2 |
| 86 | +GAMMA = 0.2 |
81 | 87 | SIGMA = np.sqrt(1 / (2 * GAMMA))
|
82 | 88 |
|
83 | 89 | # Create a classifier: a support vector classifier
|
|
86 | 92 |
|
87 | 93 | # create pipeline from kernel approximation
|
88 | 94 | # and linear svm
|
89 |
| -feature_map_fastfood = Fastfood(sigma=SIGMA, tradeoff_mem_accuracy='mem', random_state=1) |
| 95 | +feature_map_fastfood = Fastfood( |
| 96 | + sigma=SIGMA, tradeoff_mem_accuracy="mem", random_state=1 |
| 97 | +) |
90 | 98 | feature_map_fourier = RBFSampler(gamma=GAMMA, random_state=1)
|
91 | 99 | feature_map_nystroem = Nystroem(gamma=GAMMA, random_state=1)
|
92 |
| -fastfood_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fastfood), |
93 |
| - ("svm", svm.LinearSVC())]) |
| 100 | +fastfood_approx_svm = pipeline.Pipeline( |
| 101 | + [("feature_map", feature_map_fastfood), ("svm", svm.LinearSVC())] |
| 102 | +) |
94 | 103 |
|
95 |
| -fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier), |
96 |
| - ("svm", svm.LinearSVC())]) |
| 104 | +fourier_approx_svm = pipeline.Pipeline( |
| 105 | + [("feature_map", feature_map_fourier), ("svm", svm.LinearSVC())] |
| 106 | +) |
97 | 107 |
|
98 |
| -nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem), |
99 |
| - ("svm", svm.LinearSVC())]) |
| 108 | +nystroem_approx_svm = pipeline.Pipeline( |
| 109 | + [("feature_map", feature_map_nystroem), ("svm", svm.LinearSVC())] |
| 110 | +) |
100 | 111 |
|
101 | 112 | # fit and predict using linear and kernel svm:
|
102 | 113 |
|
|
148 | 159 | timescale = plt.subplot(212)
|
149 | 160 |
|
150 | 161 | accuracy.plot(sample_sizes, nystroem_scores, label="Nystroem approx. kernel")
|
151 |
| -timescale.plot(sample_sizes, nystroem_times, '--', |
152 |
| - label='Nystroem approx. kernel') |
| 162 | +timescale.plot( |
| 163 | + sample_sizes, nystroem_times, "--", label="Nystroem approx. kernel" |
| 164 | +) |
153 | 165 |
|
154 | 166 | accuracy.plot(sample_sizes, fourier_scores, label="Fourier approx. kernel")
|
155 |
| -timescale.plot(sample_sizes, fourier_times, '--', |
156 |
| - label='Fourier approx. kernel') |
| 167 | +timescale.plot( |
| 168 | + sample_sizes, fourier_times, "--", label="Fourier approx. kernel" |
| 169 | +) |
157 | 170 |
|
158 | 171 | accuracy.plot(sample_sizes, fastfood_scores, label="Fastfood approx. kernel")
|
159 |
| -timescale.plot(sample_sizes, fastfood_times, '--', |
160 |
| - label='Fastfood approx. kernel') |
| 172 | +timescale.plot( |
| 173 | + sample_sizes, fastfood_times, "--", label="Fastfood approx. kernel" |
| 174 | +) |
161 | 175 |
|
162 | 176 | # horizontal lines for exact rbf and linear kernels:
|
163 |
| -accuracy.plot([sample_sizes[0], sample_sizes[-1]], |
164 |
| - [linear_svm_score, linear_svm_score], label="linear svm") |
165 |
| -timescale.plot([sample_sizes[0], sample_sizes[-1]], |
166 |
| - [linear_svm_time, linear_svm_time], '--', label='linear svm') |
167 |
| - |
168 |
| -accuracy.plot([sample_sizes[0], sample_sizes[-1]], |
169 |
| - [kernel_svm_score, kernel_svm_score], label="rbf svm") |
170 |
| -timescale.plot([sample_sizes[0], sample_sizes[-1]], |
171 |
| - [kernel_svm_time, kernel_svm_time], '--', label='rbf svm') |
| 177 | +accuracy.plot( |
| 178 | + [sample_sizes[0], sample_sizes[-1]], |
| 179 | + [linear_svm_score, linear_svm_score], |
| 180 | + label="linear svm", |
| 181 | +) |
| 182 | +timescale.plot( |
| 183 | + [sample_sizes[0], sample_sizes[-1]], |
| 184 | + [linear_svm_time, linear_svm_time], |
| 185 | + "--", |
| 186 | + label="linear svm", |
| 187 | +) |
| 188 | + |
| 189 | +accuracy.plot( |
| 190 | + [sample_sizes[0], sample_sizes[-1]], |
| 191 | + [kernel_svm_score, kernel_svm_score], |
| 192 | + label="rbf svm", |
| 193 | +) |
| 194 | +timescale.plot( |
| 195 | + [sample_sizes[0], sample_sizes[-1]], |
| 196 | + [kernel_svm_time, kernel_svm_time], |
| 197 | + "--", |
| 198 | + label="rbf svm", |
| 199 | +) |
172 | 200 |
|
173 | 201 | # vertical line for dataset dimensionality = 64
|
174 | 202 | accuracy.plot([64, 64], [0.7, 1], label="n_features")
|
175 | 203 |
|
176 | 204 | # legends and labels
|
177 | 205 | accuracy.set_title("Classification accuracy")
|
178 |
| -timescale.set_title("Training times for dataset size of " + str(n_samples) + " with dimensionality of " |
179 |
| - + str(np.size(data, 1))) |
| 206 | +timescale.set_title( |
| 207 | + "Training times for dataset size of " |
| 208 | + + str(n_samples) |
| 209 | + + " with dimensionality of " |
| 210 | + + str(np.size(data, 1)) |
| 211 | +) |
180 | 212 | accuracy.set_xlim(sample_sizes[0], sample_sizes[-1])
|
181 | 213 | accuracy.set_xticks(())
|
182 | 214 | accuracy.set_ylim(np.min(fourier_scores), 1)
|
183 | 215 | timescale.set_xlabel("Sampling steps = transformed feature dimension")
|
184 | 216 | accuracy.set_ylabel("Classification accuracy")
|
185 | 217 | timescale.set_ylabel("Training time in seconds")
|
186 |
| -accuracy.legend(loc='best') |
187 |
| -timescale.legend(loc='best') |
| 218 | +accuracy.legend(loc="best") |
| 219 | +timescale.legend(loc="best") |
188 | 220 |
|
189 | 221 | # visualize the decision surface, projected down to the first
|
190 | 222 | # two principal components of the dataset
|
|
203 | 235 | flat_grid = grid.reshape(-1, data.shape[1])
|
204 | 236 |
|
205 | 237 | # title for the plots
|
206 |
| -titles = ['SVC with rbf kernel', |
207 |
| - 'SVC (linear kernel)\n with Fastfood rbf feature map\n' |
208 |
| - 'n_components=100', |
209 |
| - 'SVC (linear kernel)\n with Fourier rbf feature map\n' |
210 |
| - 'n_components=100', |
211 |
| - 'SVC (linear kernel)\n with Nystroem rbf feature map\n' |
212 |
| - 'n_components=100'] |
| 238 | +titles = [ |
| 239 | + "SVC with rbf kernel", |
| 240 | + "SVC (linear kernel)\n with Fastfood rbf feature map\n" "n_components=100", |
| 241 | + "SVC (linear kernel)\n with Fourier rbf feature map\n" "n_components=100", |
| 242 | + "SVC (linear kernel)\n with Nystroem rbf feature map\n" "n_components=100", |
| 243 | +] |
213 | 244 |
|
214 | 245 | plt.tight_layout()
|
215 | 246 | plt.figure(figsize=(12, 5))
|
216 | 247 |
|
217 | 248 | # predict and plot
|
218 |
| -for i, clf in enumerate((kernel_svm, fastfood_approx_svm, nystroem_approx_svm, |
219 |
| - fourier_approx_svm)): |
| 249 | +for i, clf in enumerate( |
| 250 | + (kernel_svm, fastfood_approx_svm, nystroem_approx_svm, fourier_approx_svm) |
| 251 | +): |
220 | 252 | # Plot the decision boundary. For that, we will assign a color to each
|
221 | 253 | # point in the mesh [x_min, m_max]x[y_min, y_max].
|
222 | 254 | plt.subplot(1, 4, i + 1)
|
|
225 | 257 | # Put the result into a color plot
|
226 | 258 | Z = Z.reshape(grid.shape[:-1])
|
227 | 259 | plt.contourf(multiples, multiples, Z, cmap=plt.cm.Paired)
|
228 |
| - plt.axis('off') |
| 260 | + plt.axis("off") |
229 | 261 |
|
230 | 262 | # Plot also the training points
|
231 | 263 | plt.scatter(X[:, 0], X[:, 1], c=targets_train, cmap=plt.cm.Paired)
|
|
0 commit comments