Skip to content

Commit 1b8de7a

Browse files
authored
Fix svm using for sklearn and cuml (#28)
1 parent 04ddc78 commit 1b8de7a

File tree

3 files changed

+21
-21
lines changed

3 files changed

+21
-21
lines changed

cuml/svm.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,26 @@ def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):
3636

3737
parser = argparse.ArgumentParser(description='cuML SVM benchmark')
3838

39-
parser.add_argument('-C', dest='C', type=float, default=0.01,
40-
help='SVM slack parameter')
39+
parser.add_argument('-C', dest='C', type=float, default=1.0,
40+
help='SVM regularization parameter')
4141
parser.add_argument('--kernel', choices=('linear', 'rbf'),
4242
default='linear', help='SVM kernel function')
43-
parser.add_argument('--gamma', type=float, default=None,
44-
help='Parameter for kernel="rbf"')
45-
parser.add_argument('--maxiter', type=int, default=2000,
43+
parser.add_argument('--maxiter', type=int, default=-1,
4644
help='Maximum iterations for the iterative solver. '
4745
'-1 means no limit.')
48-
parser.add_argument('--max-cache-size', type=int, default=64,
46+
parser.add_argument('--gamma', type=float, default=None,
47+
help='Parameter for kernel="rbf"')
48+
parser.add_argument('--max-cache-size', type=int, default=8,
4949
help='Maximum cache size, in gigabytes, for SVM.')
50-
parser.add_argument('--tol', type=float, default=1e-16,
50+
parser.add_argument('--tol', type=float, default=1e-3,
5151
help='Tolerance passed to sklearn.svm.SVC')
5252
params = parse_args(parser)
5353

5454
# Load data
5555
X_train, X_test, y_train, y_test = load_data(params)
5656

5757
if params.gamma is None:
58-
params.gamma = 'auto'
58+
params.gamma = 1.0 / X_train.shape[1]
5959

6060
cache_size_bytes = get_optimal_cache_size(X_train.shape[0],
6161
max_cache=params.max_cache_size)

daal4py/svm.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def test_predict(X, training_result, params):
269269
else:
270270
prdct = multi_class_classifier_prediction(
271271
nClasses=params.n_classes,
272+
method='thunder',
272273
fptype=fptype,
273274
maxIterations=params.maxiter,
274275
accuracyThreshold=params.tol,
@@ -290,20 +291,19 @@ def test_predict(X, training_result, params):
290291
def main():
291292
parser = argparse.ArgumentParser(description='daal4py SVC benchmark with '
292293
'linear kernel')
293-
parser.add_argument('-C', dest='C', type=float, default=0.01,
294-
help='SVM slack parameter')
294+
parser.add_argument('-C', dest='C', type=float, default=1.0,
295+
help='SVM regularization parameter')
295296
parser.add_argument('--kernel', choices=('linear', 'rbf'),
296297
default='linear', help='SVM kernel function')
297298
parser.add_argument('--gamma', type=float, default=None,
298299
help='Parameter for kernel="rbf"')
299-
parser.add_argument('--maxiter', type=int, default=2000,
300-
help='Maximum iterations for the iterative solver. '
301-
'-1 means no limit.')
302-
parser.add_argument('--max-cache-size', type=int, default=64,
300+
parser.add_argument('--maxiter', type=int, default=100000,
301+
help='Maximum iterations for the iterative solver. ')
302+
parser.add_argument('--max-cache-size', type=int, default=8,
303303
help='Maximum cache size, in gigabytes, for SVM.')
304304
parser.add_argument('--tau', type=float, default=1e-12,
305305
help='Tau parameter for working set selection scheme')
306-
parser.add_argument('--tol', type=float, default=1e-16,
306+
parser.add_argument('--tol', type=float, default=1e-3,
307307
help='Tolerance')
308308
parser.add_argument('--no-shrinking', action='store_false', default=True,
309309
dest='shrinking',

sklearn/svm.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,18 @@ def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):
3737

3838
parser = argparse.ArgumentParser(description='scikit-learn SVM benchmark')
3939

40-
parser.add_argument('-C', dest='C', type=float, default=0.01,
41-
help='SVM slack parameter')
40+
parser.add_argument('-C', dest='C', type=float, default=1.0,
41+
help='SVM regularization parameter')
4242
parser.add_argument('--kernel', choices=('linear', 'rbf'),
4343
default='linear', help='SVM kernel function')
4444
parser.add_argument('--gamma', type=float, default=None,
4545
help='Parameter for kernel="rbf"')
46-
parser.add_argument('--maxiter', type=int, default=2000,
46+
parser.add_argument('--maxiter', type=int, default=-1,
4747
help='Maximum iterations for the iterative solver. '
4848
'-1 means no limit.')
49-
parser.add_argument('--max-cache-size', type=int, default=64,
49+
parser.add_argument('--max-cache-size', type=int, default=8,
5050
help='Maximum cache size, in gigabytes, for SVM.')
51-
parser.add_argument('--tol', type=float, default=1e-16,
51+
parser.add_argument('--tol', type=float, default=1e-3,
5252
help='Tolerance passed to sklearn.svm.SVC')
5353
parser.add_argument('--no-shrinking', action='store_false', default=True,
5454
dest='shrinking', help="Don't use shrinking heuristic")
@@ -58,7 +58,7 @@ def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):
5858
X_train, X_test, y_train, y_test = load_data(params)
5959

6060
if params.gamma is None:
61-
params.gamma = 'auto'
61+
params.gamma = 1.0 / X_train.shape[1]
6262

6363
cache_size_bytes = get_optimal_cache_size(X_train.shape[0],
6464
max_cache=params.max_cache_size)

0 commit comments

Comments
 (0)