Skip to content

Commit 328000b

Browse files
Add benchmark for Catboost modelbuilder (#89)
* Add catboost modelbuilder benchmark * Add config for catboost modelbuilder bench * Refactor in catboost mb bench * Fix codestyle in Catboost mb bench * Fix mypy in catboost mb bench * Fix types in catboost mb bench * Fix type in catboost mb bench * Delete testing features * Add metric * Update config and add covtype dataset support * Add mlsr dataset * Update catboost config file --------- Co-authored-by: Anatoly Volkov <[email protected]>
1 parent af05b1b commit 328000b

File tree

2 files changed

+550
-0
lines changed

2 files changed

+550
-0
lines changed
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
{
2+
"common": {
3+
"lib": "modelbuilders",
4+
"data-format": "pandas",
5+
"data-order": "F",
6+
"grow-policy": "Depthwise",
7+
"dtype": "float32",
8+
"algorithm": "catboost_mb",
9+
"count-pool": "",
10+
"max-depth": 8,
11+
"learning-rate": 0.1,
12+
"reg-lambda": 1,
13+
"max-leaves": 256
14+
},
15+
"cases": [
16+
{
17+
"dataset": [
18+
{
19+
"source": "npy",
20+
"name": "abalone",
21+
"training": {
22+
"x": "data/abalone_x_train.npy",
23+
"y": "data/abalone_y_train.npy"
24+
},
25+
"testing": {
26+
"x": "data/abalone_x_test.npy",
27+
"y": "data/abalone_y_test.npy"
28+
}
29+
}
30+
],
31+
"learning-rate": 0.03,
32+
"max-depth": 6,
33+
"n-estimators": 1000,
34+
"objective": "RMSE"
35+
},
36+
{
37+
"dataset": [
38+
{
39+
"source": "npy",
40+
"name": "airline-ohe",
41+
"training": {
42+
"x": "data/airline-ohe_x_train.npy",
43+
"y": "data/airline-ohe_y_train.npy"
44+
},
45+
"testing": {
46+
"x": "data/airline-ohe_x_test.npy",
47+
"y": "data/airline-ohe_y_test.npy"
48+
}
49+
}
50+
],
51+
"max-bin": 256,
52+
"scale-pos-weight": 2,
53+
"subsample": 1,
54+
"n-estimators": 1000,
55+
"objective": "Logloss"
56+
},
57+
{
58+
"dataset": [
59+
{
60+
"source": "npy",
61+
"name": "higgs1m",
62+
"training": {
63+
"x": "data/higgs1m_x_train.npy",
64+
"y": "data/higgs1m_y_train.npy"
65+
},
66+
"testing": {
67+
"x": "data/higgs1m_x_test.npy",
68+
"y": "data/higgs1m_y_test.npy"
69+
}
70+
}
71+
],
72+
"max-bin": 256,
73+
"scale-pos-weight": 2,
74+
"subsample": 1,
75+
"n-estimators": [100, 300, 1000, 3000],
76+
"objective": "Logloss"
77+
},
78+
{
79+
"dataset": [
80+
{
81+
"source": "npy",
82+
"name": "letters",
83+
"training": {
84+
"x": "data/letters_x_train.npy",
85+
"y": "data/letters_y_train.npy"
86+
},
87+
"testing": {
88+
"x": "data/letters_x_test.npy",
89+
"y": "data/letters_y_test.npy"
90+
}
91+
}
92+
],
93+
"learning-rate": 0.03,
94+
"max-depth": 6,
95+
"max-leaves": 0,
96+
"n-estimators": 1000,
97+
"objective": "multi:softprob"
98+
},
99+
{
100+
"dataset": [
101+
{
102+
"source": "npy",
103+
"name": "mlsr",
104+
"training": {
105+
"x": "data/mlsr_x_train.npy",
106+
"y": "data/mlsr_y_train.npy"
107+
}
108+
}
109+
],
110+
"max-bin": 256,
111+
"learning-rate": 0.3,
112+
"subsample": 1,
113+
"reg-lambda": 2,
114+
"n-estimators": 200,
115+
"objective": "multi:softprob"
116+
},
117+
{
118+
"dataset": [
119+
{
120+
"source": "npy",
121+
"name": "mortgage1Q",
122+
"training": {
123+
"x": "data/mortgage1Q_x_train.npy",
124+
"y": "data/mortgage1Q_y_train.npy"
125+
}
126+
}
127+
],
128+
"n-estimators": 100,
129+
"objective": "RMSE",
130+
"scale-pos-weight": 2,
131+
"subsample": 1
132+
},
133+
{
134+
"dataset": [
135+
{
136+
"source": "npy",
137+
"name": "plasticc",
138+
"training": {
139+
"x": "data/plasticc_x_train.npy",
140+
"y": "data/plasticc_y_train.npy"
141+
},
142+
"testing": {
143+
"x": "data/plasticc_x_test.npy",
144+
"y": "data/plasticc_y_test.npy"
145+
}
146+
}
147+
],
148+
"learning-rate": 0.3,
149+
"n-estimators": 60,
150+
"objective": "multi:softprob",
151+
"max-depth": 7,
152+
"max-leaves": 0,
153+
"subsample": 0.7
154+
},
155+
{
156+
"dataset": [
157+
{
158+
"source": "npy",
159+
"name": "santander",
160+
"training": {
161+
"x": "data/santander_x_train.npy",
162+
"y": "data/santander_y_train.npy"
163+
},
164+
"testing": {
165+
"x": "data/santander_x_test.npy",
166+
"y": "data/santander_y_test.npy"
167+
}
168+
}
169+
],
170+
"learning-rate": 0.3,
171+
"n-estimators": 10000,
172+
"objective": "Logloss",
173+
"max-depth": 1,
174+
"max-leaves": 0,
175+
"subsample": 0.5,
176+
"eta": 0.1
177+
},
178+
{
179+
"objective": "Logloss",
180+
"scale-pos-weight": 2.1067817411664587,
181+
"dataset": [
182+
{
183+
"source": "npy",
184+
"name": "airline",
185+
"training": {
186+
"x": "data/airline_x_train.npy",
187+
"y": "data/airline_y_train.npy"
188+
},
189+
"testing": {
190+
"x": "data/airline_x_test.npy",
191+
"y": "data/airline_y_test.npy"
192+
}
193+
}
194+
]
195+
},
196+
{
197+
"objective": "Logloss",
198+
"scale-pos-weight": 173.63348001466812,
199+
"dataset": [
200+
{
201+
"source": "npy",
202+
"name": "bosch",
203+
"training": {
204+
"x": "data/bosch_x_train.npy",
205+
"y": "data/bosch_y_train.npy"
206+
},
207+
"testing": {
208+
"x": "data/bosch_x_test.npy",
209+
"y": "data/bosch_y_test.npy"
210+
}
211+
}
212+
]
213+
},
214+
{
215+
"dataset": [
216+
{
217+
"source": "npy",
218+
"name": "covtype",
219+
"training": {
220+
"x": "data/covtype_x_train.npy",
221+
"y": "data/covtype_y_train.npy"
222+
},
223+
"testing": {
224+
"x": "data/covtype_x_test.npy",
225+
"y": "data/covtype_y_test.npy"
226+
}
227+
}
228+
],
229+
"objective": "multi:softprob",
230+
"n-estimators": 100
231+
},
232+
{
233+
"objective": "Logloss",
234+
"scale-pos-weight": 2.0017715678375363,
235+
"dataset": [
236+
{
237+
"source": "npy",
238+
"name": "epsilon",
239+
"training": {
240+
"x": "data/epsilon_x_train.npy",
241+
"y": "data/epsilon_y_train.npy"
242+
},
243+
"testing": {
244+
"x": "data/epsilon_x_test.npy",
245+
"y": "data/epsilon_y_test.npy"
246+
}
247+
}
248+
]
249+
},
250+
{
251+
"objective": "Logloss",
252+
"scale-pos-weight": 578.2868020304569,
253+
"dataset": [
254+
{
255+
"source": "npy",
256+
"name": "fraud",
257+
"training": {
258+
"x": "data/fraud_x_train.npy",
259+
"y": "data/fraud_y_train.npy"
260+
},
261+
"testing": {
262+
"x": "data/fraud_x_test.npy",
263+
"y": "data/fraud_y_test.npy"
264+
}
265+
}
266+
]
267+
},
268+
{
269+
"objective": "Logloss",
270+
"scale-pos-weight": 1.8872389605086624,
271+
"dataset": [
272+
{
273+
"source": "npy",
274+
"name": "higgs",
275+
"training": {
276+
"x": "data/higgs_x_train.npy",
277+
"y": "data/higgs_y_train.npy"
278+
},
279+
"testing": {
280+
"x": "data/higgs_x_test.npy",
281+
"y": "data/higgs_y_test.npy"
282+
}
283+
}
284+
]
285+
},
286+
{
287+
"objective": "RMSE",
288+
"dataset": [
289+
{
290+
"source": "npy",
291+
"name": "year_prediction_msd",
292+
"training": {
293+
"x": "data/year_prediction_msd_x_train.npy",
294+
"y": "data/year_prediction_msd_y_train.npy"
295+
},
296+
"testing": {
297+
"x": "data/year_prediction_msd_x_test.npy",
298+
"y": "data/year_prediction_msd_y_test.npy"
299+
}
300+
}
301+
]
302+
}
303+
]
304+
}

0 commit comments

Comments
 (0)