Skip to content

Commit 9370b18

Browse files
Adding new datasets for model builders benchmarking (#128)
* Adding new datasets for model builders benchmarking * Update configs/modelbuilders/lgbm_mb_cpu_config.json * Update configs/modelbuilders/xgb_mb_cpu_config.json --------- Co-authored-by: Dmitry Razdoburdin <> Co-authored-by: Alexander Andreev <[email protected]>
1 parent 3c8d127 commit 9370b18

File tree

2 files changed

+487
-71
lines changed

2 files changed

+487
-71
lines changed

configs/modelbuilders/lgbm_mb_cpu_config.json

Lines changed: 257 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,27 @@
77
"algorithm": "lgbm_mb"
88
},
99
"cases": [
10+
{
11+
"dataset": [
12+
{
13+
"source": "npy",
14+
"name": "abalone",
15+
"training": {
16+
"x": "data/abalone_x_train.npy",
17+
"y": "data/abalone_y_train.npy"
18+
},
19+
"testing": {
20+
"x": "data/abalone_x_test.npy",
21+
"y": "data/abalone_y_test.npy"
22+
}
23+
}
24+
],
25+
"learning-rate": 0.03,
26+
"max-depth": 6,
27+
"max-leaves": 256,
28+
"n-estimators": [1000],
29+
"objective": "regression"
30+
},
1031
{
1132
"dataset": [
1233
{
@@ -58,9 +79,51 @@
5879
"min-child-weight": 0,
5980
"max-depth": 8,
6081
"max-leaves": 256,
61-
"n-estimators": 1000,
82+
"n-estimators": [100, 300, 1000, 3000],
6283
"objective": "binary"
6384
},
85+
{
86+
"dataset": [
87+
{
88+
"source": "npy",
89+
"name": "letters",
90+
"training": {
91+
"x": "data/letters_x_train.npy",
92+
"y": "data/letters_y_train.npy"
93+
},
94+
"testing": {
95+
"x": "data/letters_x_test.npy",
96+
"y": "data/letters_y_test.npy"
97+
}
98+
}
99+
],
100+
"learning-rate": 0.03,
101+
"max-depth": 6,
102+
"max-leaves": 256,
103+
"n-estimators": 1000,
104+
"objective": "multiclass"
105+
},
106+
{
107+
"dataset": [
108+
{
109+
"source": "npy",
110+
"name": "mlsr",
111+
"training": {
112+
"x": "data/mlsr_x_train.npy",
113+
"y": "data/mlsr_y_train.npy"
114+
}
115+
}
116+
],
117+
"max-bin": 256,
118+
"learning-rate": 0.3,
119+
"subsample": 1,
120+
"reg-lambda": 2,
121+
"min-child-weight": 1,
122+
"max-depth": 8,
123+
"max-leaves": 256,
124+
"n-estimators": 200,
125+
"objective": "multiclass"
126+
},
64127
{
65128
"dataset": [
66129
{
@@ -87,23 +150,205 @@
87150
"dataset": [
88151
{
89152
"source": "npy",
90-
"name": "mlsr",
153+
"name": "plasticc",
91154
"training": {
92-
"x": "data/mlsr_x_train.npy",
93-
"y": "data/mlsr_y_train.npy"
155+
"x": "data/plasticc_x_train.npy",
156+
"y": "data/plasticc_y_train.npy"
157+
},
158+
"testing": {
159+
"x": "data/plasticc_x_test.npy",
160+
"y": "data/plasticc_y_test.npy"
161+
}
162+
}
163+
],
164+
"n-estimators": 60,
165+
"objective": "multiclass",
166+
"max-depth": 7,
167+
"subsample": 0.7,
168+
"max-leaves": 256,
169+
"colsample-bytree": 0.7
170+
},
171+
{
172+
"dataset": [
173+
{
174+
"source": "npy",
175+
"name": "santander",
176+
"training": {
177+
"x": "data/santander_x_train.npy",
178+
"y": "data/santander_y_train.npy"
179+
},
180+
"testing": {
181+
"x": "data/santander_x_test.npy",
182+
"y": "data/santander_y_test.npy"
183+
}
184+
}
185+
],
186+
"n-estimators": 10000,
187+
"objective": "binary",
188+
"max-depth": 1,
189+
"max-leaves": 256,
190+
"subsample": 0.5,
191+
"eta": 0.1,
192+
"colsample-bytree": 0.05
193+
},
194+
{
195+
"objective": "binary",
196+
"scale-pos-weight": 2.1067817411664587,
197+
"dataset": [
198+
{
199+
"source": "npy",
200+
"name": "airline",
201+
"training": {
202+
"x": "data/airline_x_train.npy",
203+
"y": "data/airline_y_train.npy"
204+
},
205+
"testing": {
206+
"x": "data/airline_x_test.npy",
207+
"y": "data/airline_y_test.npy"
94208
}
95209
}
96210
],
97-
"max-bin": 256,
98-
"learning-rate": 0.3,
99-
"subsample": 1,
100-
"reg-lambda": 2,
101-
"min-child-weight": 1,
102-
"min-split-loss": 0.1,
103211
"max-depth": 8,
212+
"learning-rate": 0.1,
213+
"reg-lambda": 1,
104214
"max-leaves": 256,
105-
"n-estimators": 200,
106-
"objective": "multiclass"
215+
"n-estimators": 100
216+
},
217+
{
218+
"objective": "binary",
219+
"scale-pos-weight": 173.63348001466812,
220+
"dataset": [
221+
{
222+
"source": "npy",
223+
"name": "bosch",
224+
"training": {
225+
"x": "data/bosch_x_train.npy",
226+
"y": "data/bosch_y_train.npy"
227+
},
228+
"testing": {
229+
"x": "data/bosch_x_test.npy",
230+
"y": "data/bosch_y_test.npy"
231+
}
232+
}
233+
],
234+
"max-depth": 8,
235+
"learning-rate": 0.1,
236+
"reg-lambda": 1,
237+
"max-leaves": 256,
238+
"n-estimators": 100
239+
},
240+
{
241+
"objective": "multiclass",
242+
"dataset": [
243+
{
244+
"source": "npy",
245+
"name": "covtype",
246+
"training": {
247+
"x": "data/covtype_x_train.npy",
248+
"y": "data/covtype_y_train.npy"
249+
},
250+
"testing": {
251+
"x": "data/covtype_x_test.npy",
252+
"y": "data/covtype_y_test.npy"
253+
}
254+
}
255+
],
256+
"max-depth": 8,
257+
"learning-rate": 0.1,
258+
"reg-lambda": 1,
259+
"max-leaves": 256,
260+
"n-estimators": 100
261+
},
262+
{
263+
"objective": "binary",
264+
"scale-pos-weight": 2.0017715678375363,
265+
"dataset": [
266+
{
267+
"source": "npy",
268+
"name": "epsilon",
269+
"training": {
270+
"x": "data/epsilon_x_train.npy",
271+
"y": "data/epsilon_y_train.npy"
272+
},
273+
"testing": {
274+
"x": "data/epsilon_x_test.npy",
275+
"y": "data/epsilon_y_test.npy"
276+
}
277+
}
278+
],
279+
"max-depth": 8,
280+
"learning-rate": 0.1,
281+
"reg-lambda": 1,
282+
"max-leaves": 256,
283+
"n-estimators": 100
284+
},
285+
{
286+
"objective": "binary",
287+
"scale-pos-weight": 578.2868020304569,
288+
"dataset": [
289+
{
290+
"source": "npy",
291+
"name": "fraud",
292+
"training": {
293+
"x": "data/fraud_x_train.npy",
294+
"y": "data/fraud_y_train.npy"
295+
},
296+
"testing": {
297+
"x": "data/fraud_x_test.npy",
298+
"y": "data/fraud_y_test.npy"
299+
}
300+
}
301+
],
302+
"max-depth": 8,
303+
"learning-rate": 0.1,
304+
"reg-lambda": 1,
305+
"max-leaves": 256,
306+
"n-estimators": 100
307+
},
308+
{
309+
"objective": "binary",
310+
"scale-pos-weight": 1.8872389605086624,
311+
"dataset": [
312+
{
313+
"source": "npy",
314+
"name": "higgs",
315+
"training": {
316+
"x": "data/higgs_x_train.npy",
317+
"y": "data/higgs_y_train.npy"
318+
},
319+
"testing": {
320+
"x": "data/higgs_x_test.npy",
321+
"y": "data/higgs_y_test.npy"
322+
}
323+
}
324+
],
325+
"max-depth": 8,
326+
"learning-rate": 0.1,
327+
"reg-lambda": 1,
328+
"max-leaves": 256,
329+
"n-estimators": 100
330+
},
331+
{
332+
"objective": "regression",
333+
"dataset": [
334+
{
335+
"source": "npy",
336+
"name": "year_prediction_msd",
337+
"training": {
338+
"x": "data/year_prediction_msd_x_train.npy",
339+
"y": "data/year_prediction_msd_y_train.npy"
340+
},
341+
"testing": {
342+
"x": "data/year_prediction_msd_x_test.npy",
343+
"y": "data/year_prediction_msd_y_test.npy"
344+
}
345+
}
346+
],
347+
"max-depth": 8,
348+
"learning-rate": 0.1,
349+
"reg-lambda": 1,
350+
"max-leaves": 256,
351+
"n-estimators": 100
107352
}
108353
]
109354
}

0 commit comments

Comments
 (0)