Skip to content

Commit ff517cd

Browse files
ver
1 parent fad6347 commit ff517cd

File tree

7 files changed

+161
-20
lines changed

7 files changed

+161
-20
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
pycaret example
3+
4+
# WARNING
5+
6+
If you
7+
8+
pip install pycaret
9+
10+
in order to run these examples you will force scikit-learn==0.23.2
11+
Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
import numpy as np
2+
import pandas as pd
13

4+
# TODO: Illustrate the use of a portfolio manager applied to model residuals
5+
# We use pycaret to generate some model predictions for various models
26
# Example based on https://www.analyticsvidhya.com/blog/2021/07/automl-using-pycaret-with-a-regression-use-case-ii/
37

8+
9+
10+
411
if __name__=='__main__':
512
try:
613
import pycaret
@@ -11,35 +18,63 @@
1118

1219
all_data = get_data('diamond', profile=False)
1320

14-
holdout = all_data[-500:]
21+
n_train = 100
22+
n_test = 100
23+
all_data = all_data[:n_train+n_test]
24+
data = all_data[:n_train]
25+
holdout_data = all_data[n_train:]
1526

16-
data = all_data[:-500]
1727

18-
exp_reg102 = setup(data=data, target='Price', session_id=123,
19-
normalize=True, transformation=True, transform_target=True,
20-
combine_rare_levels=True, rare_level_threshold=0.05,
21-
remove_multicollinearity=True, multicollinearity_threshold=0.95,
22-
bin_numeric_features=['Carat Weight'],
23-
log_experiment=True, experiment_name='diamond1',html=False)
24-
print('done')
28+
if True:
29+
# Is this needed?
30+
exp_reg102 = setup(data=all_data, target='Price', session_id=123,
31+
normalize=True, transformation=True, transform_target=True,
32+
combine_rare_levels=True, rare_level_threshold=0.05,
33+
remove_multicollinearity=True, multicollinearity_threshold=0.95,
34+
bin_numeric_features=['Carat Weight'],
35+
log_experiment=True, experiment_name='diamond1',html=False)
36+
print('done')
2537

26-
27-
shortlist = ['catboost','xgboost','lightgbm','rf']
28-
print('Creating')
38+
shortlist = ['catboost','xgboost','lightgbm','rf','et','ada','mlp','knn','huber','tr','llar','lar','ridge','lasso']
39+
print('Creating models and turning them')
2940
workin = dict()
3041
for nm in shortlist:
3142
try:
3243
model = create_model(nm)
3344
workin[nm]=model
45+
tune_model(workin[nm])
3446
except Exception as e:
3547
print(str(e))
3648
print('sorry no dice for '+nm)
3749

38-
tuned = dict( [ (nm, tune_model(w)) for n,w in workin.items() ])
3950

40-
y_hats = list()
41-
for nm, tuned_model in tuned:
42-
y_hat = predict_model(estimator=tuned_model, data=holdout)
43-
y_hats.append(y_hat)
51+
stuff = dict()
52+
for partition, the_data in zip(['train','holdout'],[data,holdout_data]):
53+
df = pd.DataFrame(columns=list(workin.keys()))
54+
for nm, tuned_model in workin.items():
55+
y_hat = predict_model(estimator=tuned_model, data=the_data)['Label']
56+
df[nm] = y_hat
57+
df.to_csv(partition+'.csv')
58+
stuff[partition] = y_hat
59+
60+
# Use a portfolio manager to combine models ?
61+
ys = stuff['train'].values
62+
from precise.skaters.managers.schurmanagers import schur_weak_pm_t0_d0_r050_n25_g100_long_manager as mgr
63+
s = {}
64+
for y in ys:
65+
w, s = mgr(s=s,y=y)
66+
67+
X = stuff['holdout'].values
68+
y_hat = np.dot(w, X)
69+
stuff['holdout']['blend'] = y_hat
70+
71+
all_names = list(workin.keys())+['blend']
72+
holdout_error_df = pd.DataFrame(columns=all_names)
73+
for nm in all_names:
74+
holdout_error_df[nm] = (stuff['holdout'][nm]-holdout_data['Price'])**2
75+
76+
print(holdout_error_df.describe())
77+
78+
4479

4580

examples_ensembles_lazypredict/__init__.py

Whitespace-only changes.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
2+
from sklearn import datasets
3+
from sklearn.utils import shuffle
4+
import numpy as np
5+
import pandas as pd
6+
from pprint import pprint
7+
8+
if __name__=='__main__':
9+
try:
10+
from lazypredict.Supervised import LazyRegressor
11+
except ImportError:
12+
raise Exception('pip install lazypredict')
13+
14+
boston = datasets.load_boston()
15+
X, y = shuffle(boston.data, boston.target)
16+
X = X.astype(np.float32)
17+
n_train = 100
18+
n_test = 50
19+
X_train, y_train = X[:n_train], y[:n_train]
20+
X_test, y_test = X[n_train:(n_train+n_test)], y[n_train:(n_train+n_test)]
21+
X_val, y_val = X[(n_train+n_test):], y[(n_train+n_test):]
22+
X_train_and_test = X[:(n_train+n_test)]
23+
y_train_and_test = y[:(n_train+n_test)]
24+
25+
# Train on some
26+
reg1 = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, predictions=True)
27+
models1, predictions1 = reg1.fit(np.copy(X_train), np.copy(X_test), np.copy(y_train), np.copy(y_test))
28+
print(models1[:5])
29+
30+
# Train on some, predict validation
31+
reg2 = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, predictions=True)
32+
X_train_and_test_copy = np.copy(X_train_and_test)
33+
X_val_copy = np.copy(X_val)
34+
models2, predictions2 = reg2.fit(X_train_and_test_copy, X_val_copy, np.copy(y_train_and_test), np.copy(y_val))
35+
yhat_val = predictions2.values
36+
print(models2[:5])
37+
38+
# In-sample performance on train
39+
reg3 = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, predictions=True)
40+
models3, predictions3 = reg3.fit(np.copy(X_train), np.copy(X_train), np.copy(y_train), np.copy(y_train))
41+
42+
# In-sample performance on train + test
43+
reg4 = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None, predictions=True)
44+
models4, predictions4 = reg4.fit(np.copy(X_train_and_test), np.copy(X_train_and_test), np.copy(y_train_and_test), np.copy(y_train_and_test))
45+
46+
best_model_1 = models1.index[0] # <-- Best out of sample on test
47+
best_model_2 = models3.index[0] # <-- Best in sample on train
48+
best_model_3 = models4.index[0] # <-- Best in sample on train+test
49+
50+
if True:
51+
# Train cov on out of sample prediction errors
52+
print('Creating portfolio ...')
53+
from precise.skaters.managers.ppomanagers import ppo_sk_glcv_pcov_d0_n100_t0_vol_long_manager as mgr
54+
s = {}
55+
yhat_train = np.copy(predictions1.values)
56+
n_train = len(yhat_train)
57+
es = [-1]*(n_train-1)+[1]
58+
for y, y_target,e in zip(yhat_train, y_train,es):
59+
y_error = np.copy(y-y_target)
60+
w, s = mgr(s=s, y=y_error, e=e)
61+
62+
else:
63+
n_models = len(models1)
64+
w = np.ones(n_models)/n_models
65+
66+
w_dict = sorted(zip(w, models1.index), reverse=True)
67+
pprint(w_dict)
68+
69+
# Refit models using all the train+test data, and combine
70+
71+
sum_w = sum(w)
72+
yhat_weighted = np.dot( yhat_val, w )
73+
predictions2['weighted'] = yhat_weighted
74+
predictions2['best 1 (' + best_model_1 + ')'] = predictions2[best_model_1]
75+
predictions2['best 2 (' + best_model_2 + ')'] = predictions2[best_model_2]
76+
predictions2['best 3 (' + best_model_3 + ')'] = predictions2[best_model_3]
77+
78+
val_errors = predictions2.copy()
79+
for col in predictions2.columns:
80+
val_errors[col] = predictions2[col] - y_val
81+
82+
sq_errors = val_errors**2
83+
print(sq_errors.mean().sort_values())
84+
print('done')
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
95+

0 commit comments

Comments
 (0)