Skip to content

Commit 7029fc2

Browse files
Update evaluation.py
1 parent 97f7811 commit 7029fc2

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed

DimRed/evaluation.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,129 @@
11
from DimRed import *
2+
3+
4+
class Evaluation:
5+
def __init__(self, _data: Dict[str, np.ndarray], all_possible_variations: Dict[str, List], labels: np.ndarray, metric: str = "accuracy", sklearn_config: Dict[Any, Dict[str, Union[str, int]]] = sklearn_config, lgb_config: Dict[str, Union[str, int]] = lgb_config, xgb_config: Dict[str, Union[str, int]] = xgb_config) -> None:
6+
self.sklearn_config = sklearn_config
7+
self.lgb_config = lgb_config
8+
self.xgb_config = xgb_config
9+
self._data = _data
10+
self.all_variations = all_possible_variations
11+
self.labels = labels
12+
self.metric = metric
13+
14+
def sklearn(self, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray, results: Dict = {}, dimred_technique: str = None) -> Tuple[Dict[str, Union[str, int]], Dict[str, int]]:
15+
best_model = [0, {}]
16+
for model in self.sklearn_config:
17+
name = dimred_technique + model().__class__.__name__
18+
model_config = self.sklearn_config[model]
19+
wandb.init(project=PROJECT_NAME, name=name, config={
20+
"model": name, "results": results, "modelLibrary": "sklearn", "config": model_config})
21+
model = GridSearchCV(model(), model_config, cv=5, verbose=0)
22+
y_train = y_train.reshape(y_train.shape[0],)
23+
model.fit(X_train, y_train)
24+
y_preds = model.predict(X_test)
25+
y_probas = model.predict_proba(X_test)
26+
metrics = classification_report(
27+
y_test, y_preds, output_dict=True)
28+
results[model.__class__.__name__] = metrics
29+
wandb.log(metrics)
30+
wandb.sklearn.plot_classifier(
31+
model,
32+
X_train,
33+
X_test,
34+
y_train,
35+
y_test,
36+
y_preds,
37+
y_probas,
38+
range(min(y_probas.shape)),
39+
model_name=name,
40+
feature_names=None,
41+
)
42+
if metrics[self.metric] > best_model[0]:
43+
best_model[0] = metrics[self.metric]
44+
best_model[1] = metrics
45+
wandb.finish()
46+
return results, best_model[-1]
47+
48+
def xgb(self, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray, results: Dict = {}, dimred_technique: str = None) -> Tuple[Dict[str, Union[str, int]], Dict[str, int]]:
49+
model = xgb.XGBClassifier(**self.xgb_config)
50+
name = dimred_technique + model.__class__.__name__
51+
wandb.init(project=PROJECT_NAME, name=name, config={"config": self.xgb_config,
52+
"model": name, "results": results, "modelLibrary": "XGB"})
53+
y_train, y_test = label_encoding(y_train, y_test)
54+
model.fit(cp.asarray(X_train), cp.asarray(y_train), eval_set=[
55+
(cp.asarray(X_test), cp.asarray(y_test))], callbacks=[WandbCallback(log_model=True)])
56+
y_preds = model.predict(X_test)
57+
metrics = classification_report(
58+
y_test, y_preds, output_dict=True)
59+
results[name] = metrics
60+
wandb.log(metrics)
61+
wandb.finish()
62+
return results, metrics
63+
64+
def lgb(self, X_train: np.ndarray, X_test: np.ndarray, y_train: np.ndarray, y_test: np.ndarray, results: Dict = {}, dimred_technique: str = None) -> Tuple[Dict[str, Union[str, int]], Dict[str, int]]:
65+
name = dimred_technique + "LGBClf"
66+
wandb.init(project=PROJECT_NAME, name=name, config={"config": self.lgb_config,
67+
"results": results, "modelLibrary": "LGB"})
68+
y_train, y_test = label_encoding(y_train, y_test)
69+
train_data = lgb.Dataset(X_train, label=y_train)
70+
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
71+
model = lgb.train(self.lgb_config, train_data,
72+
valid_sets=[test_data], callbacks=[wandb_callback()])
73+
y_preds = model.predict(X_test)
74+
metrics = classification_report(
75+
y_test, np.argmax(y_preds, axis=1), output_dict=True)
76+
results[name] = metrics
77+
log_summary(model, save_model_checkpoint=True)
78+
wandb.log(metrics)
79+
wandb.finish()
80+
return results, metrics
81+
82+
def evaluate(self) -> Dict[str, Dict[str, Dict[str, Union[str, int]]]]:
83+
all_pipeline_performance = {}
84+
outer_iterator = tqdm(self.all_variations)
85+
best_performances = {
86+
self.metric: [],
87+
"pipeline_variation": [],
88+
# "pipeline_performance": [],
89+
"pipeline_name": [],
90+
}
91+
for pipeline_variation_name in outer_iterator:
92+
best_performing_pipeline = [0, None, pipeline_variation_name]
93+
specific_pipeline_variations = self.all_variations[pipeline_variation_name]
94+
inner_iterator = tqdm(specific_pipeline_variations, leave=False)
95+
for pipeline_variation in inner_iterator:
96+
name_of_pipeline = pipeline_variation.__class__.__name__
97+
pipeline_performance = {}
98+
X_train = pipeline_variation.fit_transform(
99+
self._data['X_train'])
100+
X_test = pipeline_variation.transform(self._data['X_test'])
101+
inner_iterator.set_description("Sklearn Model...")
102+
pipeline_performance, sklearn_metrics = self.sklearn(
103+
X_train, X_test, self._data['y_train'], self._data['y_test'], pipeline_performance, name_of_pipeline)
104+
inner_iterator.set_description("Sklearn Model Done :)")
105+
inner_iterator.set_description("XGB Model...")
106+
pipeline_performance, xgb_metrics = self.xgb(
107+
X_train, X_test, self._data['y_train'], self._data['y_test'], pipeline_performance, name_of_pipeline)
108+
inner_iterator.set_description("XGB Model Done :)")
109+
inner_iterator.set_description("LGB Model...")
110+
pipeline_performance, lgb_metrics = self.lgb(
111+
X_train, X_test, self._data['y_train'], self._data['y_test'], pipeline_performance, name_of_pipeline)
112+
inner_iterator.set_description("LGB Model Done :)")
113+
all_pipeline_performance[name_of_pipeline] = pipeline_performance
114+
avg_var = average_metric(self.metric, [sklearn_metrics,
115+
xgb_metrics, lgb_metrics])
116+
if avg_var > best_performing_pipeline[0]:
117+
best_performing_pipeline[0] = avg_var
118+
best_performing_pipeline[1] = str(pipeline_variation)
119+
# best_performing_pipeline[2] = pipeline_performance
120+
inner_iterator.set_description(
121+
f"{name_of_pipeline} Done :)")
122+
# best_performances[pipeline_variation_name] = best_performing_pipeline
123+
best_performances = add_to_dictionary(
124+
best_performances, best_performing_pipeline)
125+
with open('all_performance_data.json', 'w') as f:
126+
json.dump(all_pipeline_performance, f)
127+
with open('best_performance_dimred.json', 'w') as json_f:
128+
json.dump(best_performances, json_f)
129+
return all_pipeline_performance, best_performances

0 commit comments

Comments
 (0)