forked from JonathanCrabbe/Symbolic-Pursuit
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbuild_interpreter.py
More file actions
140 lines (124 loc) · 4.22 KB
/
build_interpreter.py
File metadata and controls
140 lines (124 loc) · 4.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import argparse
import csv
import pickle
import sys
from time import gmtime, strftime
from sklearn.metrics import mean_squared_error, r2_score
import symbolic_pursuit.logger as log
from datasets.data_loader_UCI import data_loader, mixup
from experiments.train_model import train_model
from symbolic_pursuit.models import SymbolicRegressor
log.add(sink=sys.stderr, level="INFO")
def init_arg():
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default="wine-quality-red", type=str)
parser.add_argument(
"--test_ratio", default=0.2, type=float, help="percentage of test examples"
)
parser.add_argument("--model", default=None, help="a black box model to interpret")
parser.add_argument(
"--model_type",
default="MLP",
type=str,
help="type of black-box (MLP, KNN, ...)",
)
parser.add_argument(
"--loss_tol",
default=1.0e-3,
type=float,
help="the tolerance for the loss under which the pursuit stops",
)
parser.add_argument(
"--ratio_tol",
default=0.9,
type=float,
help="a new term is added only if new_loss / old_loss < ratio_tol",
)
parser.add_argument(
"--maxiter",
default=100,
type=int,
help="maximum number of iterations for optimization",
)
parser.add_argument(
"--eps",
default=1.0e-5,
type=float,
help="small number used for numerical stability",
)
parser.add_argument(
"--random_seed", type=int, default=42, help="random seed for reproducibility"
)
return parser.parse_args()
if __name__ == "__main__":
# Extract arguments from the parser
args = init_arg()
dataset_name = args.dataset
model = args.model
model_type = args.model_type
test_ratio = args.test_ratio
loss_tol = args.loss_tol
ratio_tol = args.ratio_tol
maxiter = args.maxiter
eps = args.eps
random_seed = args.random_seed
print(
"\nWelcome to this experiment evaluating the performance of symbolic modeling. \n"
+ "This experiment uses the black-box {} on the dataset {}. \n".format(
model_type, dataset_name
)
+ f"The ratio of test examples is test_ratio={test_ratio}. \n"
)
# Train the model (if no model is given) and the symbolic model
X_train, y_train, X_test, y_test = data_loader(
dataset_name, random_seed=random_seed, test_ratio=test_ratio
)
X_mixup = mixup(X_train, random_seed=random_seed)
if model is None:
model = train_model(
X_train, y_train, black_box=model_type, random_seed=random_seed
)
else:
model_type = model.__class__.__name__
symbolic_model = SymbolicRegressor(
loss_tol=loss_tol,
ratio_tol=ratio_tol,
maxiter=maxiter,
eps=eps,
random_seed=random_seed,
)
symbolic_model.fit(model.predict, X_mixup)
# Compute the metrics
model_mse = mean_squared_error(y_test, model.predict(X_test))
symbolic_mse = mean_squared_error(y_test, symbolic_model.predict(X_test))
model_symbolic_mse = mean_squared_error(
model.predict(X_test), symbolic_model.predict(X_test)
)
model_r2 = r2_score(y_test, model.predict(X_test))
symbolic_r2 = r2_score(y_test, symbolic_model.predict(X_test))
model_symbolic_r2 = r2_score(model.predict(X_test), symbolic_model.predict(X_test))
symbolic_nterms = len(symbolic_model.terms_list)
# Save everything
time_str = strftime("%Y-%m-%d %H:%M:%S", gmtime())
with open("experiments/dataset_results.csv", "a", newline="") as csvfile:
csv_writer = csv.writer(csvfile, delimiter=" ")
csv_writer.writerow(
[
time_str,
dataset_name,
model_type,
model_mse,
symbolic_mse,
model_symbolic_mse,
model_r2,
symbolic_r2,
model_symbolic_r2,
symbolic_nterms,
]
)
with open(
f"experiments/models/{dataset_name}_{model_type}_{time_str}.pickle",
"wb",
) as filename:
save_tuple = (model, symbolic_model)
pickle.dump(save_tuple, filename)