Skip to content

Commit f7d62c7

Browse files
authored
Merge pull request #19 from Alexsandruss/benchs_ext
Extension of benchmark parameters and output
2 parents 7d0f9ba + 3aef98e commit f7d62c7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3972
-809
lines changed

Makefile

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -119,23 +119,22 @@ ARGS_SKLEARN_ridge = --size "$(REGRESSION_SIZE)"
119119
ARGS_SKLEARN_linear = --size "$(REGRESSION_SIZE)"
120120
ARGS_SKLEARN_pca_daal = --size "$(REGRESSION_SIZE)" --svd-solver daal
121121
ARGS_SKLEARN_pca_full = --size "$(REGRESSION_SIZE)" --svd-solver full
122-
ARGS_SKLEARN_kmeans = --data-multiplier "$(MULTIPLIER)" \
123-
--filex data/clustering/kmeans_$(KMEANS_SIZE).npy \
124-
--filei data/clustering/kmeans_$(KMEANS_SIZE).init.npy
125-
ARGS_SKLEARN_svm2 = --fileX data/two/X-$(SVM_SIZE).npy \
126-
--fileY data/two/y-$(SVM_SIZE).npy
127-
ARGS_SKLEARN_svm5 = --fileX data/multi/X-$(SVM_SIZE).npy \
128-
--fileY data/multi/y-$(SVM_SIZE).npy
129-
ARGS_SKLEARN_logreg2 = --fileX data/two/X-$(LOGREG_SIZE).npy \
130-
--fileY data/two/y-$(LOGREG_SIZE).npy
131-
ARGS_SKLEARN_logreg5 = --fileX data/multi/X-$(LOGREG_SIZE).npy \
132-
--fileY data/multi/y-$(LOGREG_SIZE).npy
133-
ARGS_SKLEARN_dfclf2 = --fileX data/two/X-$(DFCLF_SIZE).npy \
134-
--fileY data/two/y-$(DFCLF_SIZE).npy
135-
ARGS_SKLEARN_dfclf5 = --fileX data/multi/X-$(DFCLF_SIZE).npy \
136-
--fileY data/multi/y-$(DFCLF_SIZE).npy
137-
ARGS_SKLEARN_dfreg = --fileX data/reg/X-$(DFREG_SIZE).npy \
138-
--fileY data/reg/y-$(DFREG_SIZE).npy
122+
ARGS_SKLEARN_kmeans = --file-X-train data/clustering/kmeans_$(KMEANS_SIZE).npy \
123+
--filei data/clustering/kmeans_$(KMEANS_SIZE).init.npy
124+
ARGS_SKLEARN_svm2 = --file-X-train data/two/X-$(SVM_SIZE).npy \
125+
--file-y-train data/two/y-$(SVM_SIZE).npy
126+
ARGS_SKLEARN_svm5 = --file-X-train data/multi/X-$(SVM_SIZE).npy \
127+
--file-y-train data/multi/y-$(SVM_SIZE).npy
128+
ARGS_SKLEARN_logreg2 = --file-X-train data/two/X-$(LOGREG_SIZE).npy \
129+
--file-y-train data/two/y-$(LOGREG_SIZE).npy
130+
ARGS_SKLEARN_logreg5 = --file-X-train data/multi/X-$(LOGREG_SIZE).npy \
131+
--file-y-train data/multi/y-$(LOGREG_SIZE).npy
132+
ARGS_SKLEARN_dfclf2 = --file-X-train data/two/X-$(DFCLF_SIZE).npy \
133+
--file-y-train data/two/y-$(DFCLF_SIZE).npy
134+
ARGS_SKLEARN_dfclf5 = --file-X-train data/multi/X-$(DFCLF_SIZE).npy \
135+
--file-y-train data/multi/y-$(DFCLF_SIZE).npy
136+
ARGS_SKLEARN_dfreg = --file-X-train data/reg/X-$(DFREG_SIZE).npy \
137+
--file-y-train data/reg/y-$(DFREG_SIZE).npy
139138

140139
DAAL4PY_distances = distances
141140
DAAL4PY_ridge = ridge
@@ -156,23 +155,22 @@ ARGS_DAAL4PY_ridge = --size "$(REGRESSION_SIZE)"
156155
ARGS_DAAL4PY_linear = --size "$(REGRESSION_SIZE)"
157156
ARGS_DAAL4PY_pca_daal = --size "$(REGRESSION_SIZE)" --svd-solver daal
158157
ARGS_DAAL4PY_pca_full = --size "$(REGRESSION_SIZE)" --svd-solver full
159-
ARGS_DAAL4PY_kmeans = --data-multiplier "$(MULTIPLIER)" \
160-
--filex data/clustering/kmeans_$(KMEANS_SIZE).npy \
158+
ARGS_DAAL4PY_kmeans = --file-X-train data/clustering/kmeans_$(KMEANS_SIZE).npy \
161159
--filei data/clustering/kmeans_$(KMEANS_SIZE).init.npy
162-
ARGS_DAAL4PY_svm2 = --fileX data/two/X-$(SVM_SIZE).npy \
163-
--fileY data/two/y-$(SVM_SIZE).npy
164-
ARGS_DAAL4PY_svm5 = --fileX data/multi/X-$(SVM_SIZE).npy \
165-
--fileY data/multi/y-$(SVM_SIZE).npy
166-
ARGS_DAAL4PY_logreg2 = --fileX data/two/X-$(LOGREG_SIZE).npy \
167-
--fileY data/two/y-$(LOGREG_SIZE).npy
168-
ARGS_DAAL4PY_logreg5 = --fileX data/multi/X-$(LOGREG_SIZE).npy \
169-
--fileY data/multi/y-$(LOGREG_SIZE).npy
170-
ARGS_DAAL4PY_dfclf2 = --fileX data/two/X-$(DFCLF_SIZE).npy \
171-
--fileY data/two/y-$(DFCLF_SIZE).npy
172-
ARGS_DAAL4PY_dfclf5 = --fileX data/multi/X-$(DFCLF_SIZE).npy \
173-
--fileY data/multi/y-$(DFCLF_SIZE).npy
174-
ARGS_DAAL4PY_dfreg = --fileX data/reg/X-$(DFREG_SIZE).npy \
175-
--fileY data/reg/y-$(DFREG_SIZE).npy
160+
ARGS_DAAL4PY_svm2 = --file-X-train data/two/X-$(SVM_SIZE).npy \
161+
--file-y-train data/two/y-$(SVM_SIZE).npy
162+
ARGS_DAAL4PY_svm5 = --file-X-train data/multi/X-$(SVM_SIZE).npy \
163+
--file-y-train data/multi/y-$(SVM_SIZE).npy
164+
ARGS_DAAL4PY_logreg2 = --file-X-train data/two/X-$(LOGREG_SIZE).npy \
165+
--file-y-train data/two/y-$(LOGREG_SIZE).npy
166+
ARGS_DAAL4PY_logreg5 = --file-X-train data/multi/X-$(LOGREG_SIZE).npy \
167+
--file-y-train data/multi/y-$(LOGREG_SIZE).npy
168+
ARGS_DAAL4PY_dfclf2 = --file-X-train data/two/X-$(DFCLF_SIZE).npy \
169+
--file-y-train data/two/y-$(DFCLF_SIZE).npy
170+
ARGS_DAAL4PY_dfclf5 = --file-X-train data/multi/X-$(DFCLF_SIZE).npy \
171+
--file-y-train data/multi/y-$(DFCLF_SIZE).npy
172+
ARGS_DAAL4PY_dfreg = --file-X-train data/reg/X-$(DFREG_SIZE).npy \
173+
--file-y-train data/reg/y-$(DFREG_SIZE).npy
176174

177175
comma = ,
178176

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,16 @@ Python*. See benchmark results [here](https://intelpython.github.io/scikit-learn
55

66
## Prerequisites
77
- python and scikit-learn to run python versions
8+
- pandas when using its DataFrame as input data format
89
- `icc`, `ifort`, `mkl`, `daal` to compile and run native benchmarks
910

10-
## Automatically build and run
11+
## How to create conda environment for benchmarking
12+
`conda create -n skl_bench -c intel python=3.7 scikit-learn pandas`
13+
14+
## Running Python benchmarks with runner script
15+
`python runner --config config_example.json [--output-format json --verbose]`
16+
17+
## Legacy automatic building and running
1118
- Run `make`. This will generate data, compile benchmarks, and run them.
1219
- To run only scikit-learn benchmarks, use `make sklearn`.
1320
- To run only native benchmarks, use `make native`.

config_example.json

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
{
2+
"common": {
3+
"lib": ["sklearn", "daal4py"],
4+
"data-format": ["pandas"],
5+
"data-order": ["F"],
6+
"dtype": ["float64"]
7+
},
8+
"cases": [
9+
{
10+
"algorithm": "distances",
11+
"dataset": [
12+
{
13+
"source": "synthetic",
14+
"type": "classification",
15+
"n_classes": 2,
16+
"n_features": 15000,
17+
"training": {
18+
"n_samples": 1000
19+
}
20+
}
21+
]
22+
},
23+
{
24+
"algorithm": "kmeans",
25+
"dataset": [
26+
{
27+
"source": "synthetic",
28+
"type": "kmeans",
29+
"n_clusters": 10,
30+
"n_features": 50,
31+
"training": {
32+
"n_samples": 1000000
33+
}
34+
}
35+
],
36+
"n-clusters": [10]
37+
},
38+
{
39+
"algorithm": "dbscan",
40+
"dataset": [
41+
{
42+
"source": "synthetic",
43+
"type": "blobs",
44+
"n_clusters": 10,
45+
"n_features": 50,
46+
"training": {
47+
"n_samples": 100000
48+
}
49+
}
50+
],
51+
"min-samples": [5000],
52+
"eps": [1]
53+
},
54+
{
55+
"algorithm": "linear",
56+
"dataset": [
57+
{
58+
"source": "synthetic",
59+
"type": "regression",
60+
"n_features": 50,
61+
"training": {
62+
"n_samples": 1000000
63+
}
64+
}
65+
]
66+
},
67+
{
68+
"algorithm": "ridge",
69+
"dataset": [
70+
{
71+
"source": "synthetic",
72+
"type": "regression",
73+
"n_features": 50,
74+
"training": {
75+
"n_samples": 1000000
76+
}
77+
}
78+
]
79+
},
80+
{
81+
"algorithm": "df_clsf",
82+
"dataset": [
83+
{
84+
"source": "synthetic",
85+
"type": "classification",
86+
"n_classes": 2,
87+
"n_features": 100,
88+
"training": {
89+
"n_samples": 10000
90+
}
91+
},
92+
{
93+
"source": "synthetic",
94+
"type": "classification",
95+
"n_classes": 5,
96+
"n_features": 100,
97+
"training": {
98+
"n_samples": 10000
99+
}
100+
}
101+
]
102+
},
103+
{
104+
"algorithm": "df_regr",
105+
"dataset": [
106+
{
107+
"source": "synthetic",
108+
"type": "regression",
109+
"n_features": 100,
110+
"training": {
111+
"n_samples": 10000
112+
}
113+
}
114+
]
115+
},
116+
{
117+
"algorithm": "log_reg",
118+
"dataset": [
119+
{
120+
"source": "synthetic",
121+
"type": "classification",
122+
"n_classes": 2,
123+
"n_features": 100,
124+
"training": {
125+
"n_samples": 100000
126+
}
127+
},
128+
{
129+
"source": "synthetic",
130+
"type": "classification",
131+
"n_classes": 5,
132+
"n_features": 100,
133+
"training": {
134+
"n_samples": 100000
135+
}
136+
}
137+
]
138+
},
139+
{
140+
"algorithm": "pca",
141+
"dataset": [
142+
{
143+
"source": "synthetic",
144+
"type": "classification",
145+
"n_classes": 2,
146+
"n_features": 100,
147+
"training": {
148+
"n_samples": 10000
149+
}
150+
}
151+
],
152+
"svd-solver": ["daal", "full"]
153+
},
154+
{
155+
"algorithm": "svm",
156+
"dataset": [
157+
{
158+
"source": "synthetic",
159+
"type": "classification",
160+
"n_classes": 2,
161+
"n_features": 100,
162+
"training": {
163+
"n_samples": 20000
164+
}
165+
},
166+
{
167+
"source": "synthetic",
168+
"type": "classification",
169+
"n_classes": 5,
170+
"n_features": 100,
171+
"training": {
172+
"n_samples": 20000
173+
}
174+
}
175+
],
176+
"max-cache-size": [4],
177+
"kernel": ["rbf"]
178+
},
179+
{
180+
"lib": ["xgboost"],
181+
"algorithm": "gbt",
182+
"dataset": [
183+
{
184+
"source": "synthetic",
185+
"type": "classification",
186+
"n_classes": 2,
187+
"n_features": 100,
188+
"training": {
189+
"n_samples": 10000
190+
}
191+
}
192+
],
193+
"tree-method": ["hist"],
194+
"objective": ["binary:logistic"]
195+
},
196+
{
197+
"lib": ["xgboost"],
198+
"algorithm": "gbt",
199+
"dataset": [
200+
{
201+
"source": "synthetic",
202+
"type": "regression",
203+
"n_features": 100,
204+
"training": {
205+
"n_samples": 10000
206+
}
207+
}
208+
],
209+
"tree-method": ["hist"],
210+
"objective": ["reg:squarederror"]
211+
}
212+
]
213+
}

0 commit comments

Comments
 (0)