@@ -151,7 +151,7 @@ def categorize_dataframe(df, ycol='0', bins=5, cutoffs=None, verbose=False):
151
151
152
152
153
153
def make_group_from_columns (df , groupcols ):
154
- return df [groupcols ].astype (str ).sum (axis = 1 ).as_matrix ()
154
+ return df [groupcols ].astype (str ).sum (axis = 1 ).values
155
155
156
156
157
157
def summarize (df , ycol = '0' , classify = False , bins = 0 , cutoffs = None , min_count = 0 ):
@@ -188,8 +188,8 @@ def split_data(df, ycol='0', classify=False, cv=5, bins=0, cutoffs=None, groupco
188
188
if ycol .isdigit ():
189
189
ycol = df .columns [int (ycol )]
190
190
191
- y = df .loc [:, ycol ].as_matrix ()
192
- x = df .drop (ycol , axis = 1 ).as_matrix ()
191
+ y = df .loc [:, ycol ].values
192
+ x = df .drop (ycol , axis = 1 ).values
193
193
features = df .drop (ycol , axis = 1 ).columns .tolist ()
194
194
195
195
if verbose :
@@ -242,10 +242,13 @@ def verify_path(path):
242
242
os .makedirs (folder )
243
243
244
244
245
- def train (model , x , y , features = None , classify = False , threads = - 1 , prefix = '' , name = None , save = False ):
245
+ def train (model , x , y , features = None , classify = False , threads = - 1 , prefix = '' , name = None , save = False , verbose = True ):
246
246
verify_path (prefix )
247
247
model , model_name = get_model (model , threads , classify = classify )
248
248
model .fit (x , y )
249
+ if verbose :
250
+ train_score = model .score (x , y )
251
+ print ('Train score = {:.3f}' .format (train_score ))
249
252
name = name or model_name
250
253
if save :
251
254
model_desc_fname = "{}.{}.description" .format (prefix , name )
@@ -284,7 +287,7 @@ def classify(model, x, y, splits, features, threads=-1, prefix='', seed=0):
284
287
test_score = model .score (x_test , y_test )
285
288
train_scores .append (train_score )
286
289
test_scores .append (test_score )
287
- print (" fold {}/{}: score = {:.3f}" .format (i + 1 , len (splits ), test_score ))
290
+ print (" fold {}/{}: score = {:.3f} (train = {:.3f}) " .format (i + 1 , len (splits ), test_score , train_score ))
288
291
if test_score > best_score :
289
292
best_model = model
290
293
best_score = test_score
@@ -360,7 +363,7 @@ def regress(model, x, y, splits, features, threads=-1, prefix='', seed=0):
360
363
test_score = model .score (x_test , y_test )
361
364
train_scores .append (train_score )
362
365
test_scores .append (test_score )
363
- print (" fold {}/{}: score = {:.3f}" .format (i + 1 , len (splits ), test_score ))
366
+ print (" fold {}/{}: score = {:.3f} (train = {:.3f}) " .format (i + 1 , len (splits ), test_score , train_score ))
364
367
if test_score > best_score :
365
368
best_model = model
366
369
best_score = test_score
0 commit comments