Skip to content

Commit b3500dd

Browse files
committed
Creating branch for large scale measurements
1 parent eddb9e8 commit b3500dd

File tree

12 files changed

+298
-6
lines changed

12 files changed

+298
-6
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd basicstats parameters": {
5+
"algorithm": {
6+
"estimator": "BasicStatistics",
7+
"estimator_methods": { "training": "compute" }
8+
},
9+
"data": {
10+
"split_kwargs": { "test_size": 0.0001 }
11+
}
12+
},
13+
"synthetic data": {
14+
"data": [
15+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
17+
]
18+
}
19+
},
20+
"TEMPLATES": {
21+
"basicstats": {
22+
"SETS": [
23+
"sklearnex spmd implementation",
24+
"large scale default parameters",
25+
"synthetic data",
26+
"spmd basicstats parameters"
27+
]
28+
}
29+
}
30+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd basicstats parameters": {
5+
"algorithm": {
6+
"estimator": "EmpiricalCovariance",
7+
"estimator_methods": { "training": "fit" }
8+
},
9+
"data": {
10+
"split_kwargs": { "test_size": 0.0001 }
11+
}
12+
},
13+
"synthetic data": {
14+
"data": [
15+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
17+
]
18+
}
19+
},
20+
"TEMPLATES": {
21+
"covariance": {
22+
"SETS": [
23+
"sklearnex spmd implementation",
24+
"large scale default parameters",
25+
"synthetic data",
26+
"spmd basicstats parameters"
27+
]
28+
}
29+
}
30+
}

configs/spmd/large_scale/dbscan.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd dbscan parameters": {
5+
"algorithm": {
6+
"estimator": "DBSCAN",
7+
"estimator_methods": {
8+
"training": "fit"
9+
}
10+
},
11+
"data": {
12+
"dtype": "float64"
13+
}
14+
},
15+
"synthetic dataset": {
16+
"data": [
17+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 10, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
18+
]
19+
}
20+
},
21+
"TEMPLATES": {
22+
"dbscan": {
23+
"SETS": [
24+
"common dbscan parameters",
25+
"synthetic dataset",
26+
"sklearnex spmd implementation",
27+
"large scale default parameters",
28+
"spmd dbscan parameters"
29+
]
30+
}
31+
}
32+
}

configs/spmd/large_scale/kmeans.json

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd kmeans parameters": {
5+
"algorithm": {
6+
"estimator": "KMeans",
7+
"estimator_params": {
8+
"algorithm": "lloyd"
9+
},
10+
"estimator_methods": { "training": "fit" }
11+
},
12+
"bench": {
13+
"mpi_params": {"n": 48}
14+
}
15+
},
16+
"synthetic data": {
17+
"data": [
18+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 5000000, "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }
19+
]
20+
}
21+
},
22+
"TEMPLATES": {
23+
"kmeans": {
24+
"SETS": [
25+
"synthetic data",
26+
"sklearnex spmd implementation",
27+
"large scale default parameters",
28+
"spmd kmeans parameters"
29+
]
30+
}
31+
}
32+
}

configs/spmd/large_scale/knn.json

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/knn.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd knn cls parameters": {
5+
"algorithm": {
6+
"estimator": "KNeighborsClassifier",
7+
"estimator_params": {
8+
"algorithm": "brute",
9+
"metric": "minkowski",
10+
"p": 2,
11+
"weights": "uniform",
12+
"n_neighbors": 5
13+
},
14+
"estimator_methods": {
15+
"training": "fit",
16+
"inference": "predict"
17+
}
18+
},
19+
"bench": {
20+
"mpi_params": {}
21+
}
22+
},
23+
"synthetic classification data": {
24+
"data": [
25+
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 550000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
26+
{ "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 500000 }, "generation_kwargs": { "n_samples": 550000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
27+
{ "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
28+
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 500000 }, "generation_kwargs": { "n_samples": 1000000, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
29+
]
30+
}
31+
},
32+
"TEMPLATES": {
33+
"knn classifier": {
34+
"SETS": [
35+
"common knn parameters",
36+
"synthetic classification data",
37+
"sklearnex spmd implementation",
38+
"large scale 2k parameters",
39+
"spmd knn cls parameters"
40+
]
41+
}
42+
}
43+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"PARAMETERS_SETS": {
3+
"large scale default parameters": {
4+
"data": {
5+
"dtype": "float64",
6+
"distributed_split": "None"
7+
},
8+
"bench": {
9+
"mpi_params": {"n": [1,2,3,4,5,6,7,8,9,10,11,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
10+
}
11+
},
12+
"large scale 2k parameters": {
13+
"data": {
14+
"dtype": "float64",
15+
"distributed_split": "None"
16+
},
17+
"bench": {
18+
"mpi_params": {"n": [192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
19+
}
20+
},
21+
"large scale impi parameters": {
22+
"data": {
23+
"dtype": "float64",
24+
"distributed_split": "no"
25+
},
26+
"bench": {
27+
"mpi_params": {"n": [1,2,4,6,12,24], "ppn": 12}
28+
}
29+
}
30+
}
31+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd linear parameters": {
5+
"algorithm": {
6+
"estimator": "LinearRegression",
7+
"estimator_methods": { "training": "fit" }
8+
}
9+
},
10+
"synthetic data": {
11+
"data": [
12+
{ "source": "make_regression", "generation_kwargs": { "n_samples": 30005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 30000000, "test_size": 5000 } },
13+
{ "source": "make_regression", "generation_kwargs": { "n_samples": 305000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }
14+
]
15+
}
16+
},
17+
"TEMPLATES": {
18+
"linreg": {
19+
"SETS": [
20+
"sklearnex spmd implementation",
21+
"large scale default parameters",
22+
"synthetic data",
23+
"spmd linear parameters"
24+
]
25+
}
26+
}
27+
}

configs/spmd/large_scale/logreg.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd logreg2 parameters": {
5+
"algorithm":{
6+
"estimator": "LogisticRegression",
7+
"estimator_methods": { "inference": "predict" },
8+
"estimator_params": { "max_iter": 20 }
9+
}
10+
},
11+
"synthetic data": {
12+
"data": [
13+
{ "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 10, "n_classes": 2 } },
14+
{ "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 101000, "n_features": 1000, "n_classes": 2 } }
15+
]
16+
}
17+
},
18+
"TEMPLATES": {
19+
"linreg": {
20+
"SETS": [
21+
"sklearnex spmd implementation",
22+
"large scale 2k parameters",
23+
"spmd logreg parameters",
24+
"synthetic data",
25+
"spmd logreg2 parameters"
26+
]
27+
}
28+
}
29+
}

configs/spmd/large_scale/pca.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd pca parameters": {
5+
"algorithm": {
6+
"estimator": "PCA",
7+
"estimator_methods": { "training": "fit", "inference": "" }
8+
},
9+
"data": {
10+
"split_kwargs": { "test_size": 0.0001 }
11+
}
12+
},
13+
"synthetic data": {
14+
"data": [
15+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } },
16+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } }
17+
]
18+
}
19+
},
20+
"TEMPLATES": {
21+
"linreg": {
22+
"SETS": [
23+
"sklearnex spmd implementation",
24+
"large scale default parameters",
25+
"synthetic data",
26+
"spmd pca parameters"
27+
]
28+
}
29+
}
30+
}

sklbench/benchmarks/sklearn_estimator.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ def get_subset_metrics_of_estimator(
134134
and isinstance(iterations[0], Union[Numeric, NumpyNumeric].__args__)
135135
):
136136
metrics.update({"iterations": int(iterations[0])})
137+
if hasattr(estimator_instance, "_n_inner_iter"):
138+
inner_iters = estimator_instance._n_inner_iter
139+
metrics.update({"inner_iters": int(inner_iters)})
137140
if task == "classification":
138141
y_pred = convert_to_numpy(estimator_instance.predict(x))
139142
metrics.update(
@@ -142,7 +145,7 @@ def get_subset_metrics_of_estimator(
142145
"balanced accuracy": float(balanced_accuracy_score(y_compat, y_pred)),
143146
}
144147
)
145-
if hasattr(estimator_instance, "predict_proba") and not (
148+
'''if hasattr(estimator_instance, "predict_proba") and not (
146149
hasattr(estimator_instance, "probability")
147150
and getattr(estimator_instance, "probability") == False
148151
):
@@ -162,7 +165,7 @@ def get_subset_metrics_of_estimator(
162165
),
163166
"logloss": float(log_loss(y_compat, y_pred_proba)),
164167
}
165-
)
168+
)'''
166169
elif task == "regression":
167170
y_pred = convert_to_numpy(estimator_instance.predict(x))
168171
metrics.update(
@@ -429,7 +432,6 @@ def measure_sklearn_estimator(
429432
estimator_instance.get_booster()
430433
)
431434
method_instance = getattr(daal_model, method)
432-
433435
metrics[method] = dict()
434436
(
435437
metrics[method]["time[ms]"],

0 commit comments

Comments
 (0)