Skip to content

Commit 5980f9e

Browse files
committed
add script for comparing automl eval
1 parent 097f5ed commit 5980f9e

File tree

1 file changed

+232
-0
lines changed

1 file changed

+232
-0
lines changed

scripts/automl_eval.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
from datasets import load_dataset, concatenate_datasets
2+
import pandas as pd
3+
import logging
4+
import numpy as np
5+
import argparse
6+
from sklearn.metrics import classification_report
7+
8+
import wandb
9+
10+
logging.basicConfig(level="INFO")
11+
12+
13+
def load_data(dataset_name: str) -> tuple[pd.DataFrame, pd.DataFrame]:
14+
"""
15+
Load a dataset from the Hugging Face datasets library.
16+
17+
Args:
18+
dataset_name (str): The name of the dataset to load.
19+
20+
Returns:
21+
DatasetDict: A dictionary containing the train, validation, and test splits of the dataset.
22+
"""
23+
# Load the dataset
24+
dataset = load_dataset(dataset_name)
25+
26+
if "train_0" in dataset:
27+
for col in ["train", "validation"]:
28+
dataset[col] = concatenate_datasets([dataset[f"{col}_0"], dataset[f"{col}_1"]])
29+
dataset.pop(f"{col}_0")
30+
dataset.pop(f"{col}_1")
31+
32+
train_data = dataset["train"]
33+
test_data = dataset["test"]
34+
35+
train_df = train_data.to_pandas()
36+
max_label = train_df["label"].max()
37+
train_df.loc[train_df["label"].isna(), "label"] = max_label + 1
38+
39+
test_df = test_data.to_pandas()
40+
test_df.loc[test_df["label"].isna(), "label"] = max_label + 1
41+
return train_df, test_df
42+
43+
44+
def evalute_fedot(train_df: pd.DataFrame, test_df: pd.DataFrame):
45+
"""
46+
Train a Fedot model on the provided training and testing data.
47+
48+
Args:
49+
train_df (pd.DataFrame): The training data.
50+
test_df (pd.DataFrame): The testing data.
51+
"""
52+
# !pip install fedot
53+
from fedot.api.main import Fedot
54+
55+
X_train, y_train = train_df[["utterance"]], train_df["label"].astype(int)
56+
X_test, y_test = test_df[["utterance"]], test_df["label"].astype(int)
57+
model = Fedot(problem="classification", timeout=5, preset="best_quality", n_jobs=-1)
58+
model.fit(features=X_train, target=y_train)
59+
prediction = model.predict(features=X_test)
60+
return prediction
61+
62+
63+
def evaluate_h2o(train_df: pd.DataFrame, test_df: pd.DataFrame) -> pd.DataFrame:
64+
"""
65+
Train an H2O model on the provided training and testing data.
66+
67+
Args:
68+
train_df (pd.DataFrame): The training data.
69+
test_df (pd.DataFrame): The testing data.
70+
"""
71+
# !pip install h2o
72+
import h2o
73+
from h2o.estimators import H2OGradientBoostingEstimator
74+
from h2o.estimators.word2vec import H2OWord2vecEstimator
75+
from h2o.automl import H2OAutoML
76+
77+
max_models: int = 20
78+
max_runtime_secs: int = 600
79+
seed: int = 42
80+
81+
h2o.init()
82+
83+
train_h2o = h2o.H2OFrame(train_df)
84+
test_h2o = h2o.H2OFrame(test_df)
85+
train_h2o["label"] = train_h2o["label"].asfactor()
86+
test_h2o["label"] = test_h2o["label"].asfactor()
87+
train, valid = train_h2o.split_frame(ratios=[0.8])
88+
text_col = "utterance"
89+
label_col = "label"
90+
train_tokens = train[text_col].tokenize("\\s+")
91+
valid_tokens = valid[text_col].tokenize("\\s+")
92+
test_tokens = test_h2o[text_col].tokenize(
93+
"\\s+"
94+
) # Word2Vec needs token lists :contentReference[oaicite:0]{index=0}
95+
96+
w2v_model = H2OWord2vecEstimator(sent_sample_rate=0.0, epochs=10)
97+
w2v_model.train(training_frame=train_tokens)
98+
99+
train_vecs = w2v_model.transform(train_tokens, aggregate_method="AVERAGE")
100+
valid_vecs = w2v_model.transform(valid_tokens, aggregate_method="AVERAGE")
101+
test_vecs = w2v_model.transform(test_tokens, aggregate_method="AVERAGE")
102+
103+
train_ext = train_vecs.cbind(train[label_col])
104+
valid_ext = valid_vecs.cbind(valid[label_col])
105+
test_ext = test_vecs.cbind(test_h2o[label_col])
106+
107+
x_cols = train_vecs.columns
108+
y_col = label_col
109+
110+
# 9. Run H2OAutoML
111+
aml = H2OAutoML(
112+
max_models=max_models,
113+
max_runtime_secs=max_runtime_secs,
114+
seed=seed,
115+
balance_classes=True,
116+
sort_metric="mean_per_class_error",
117+
)
118+
aml.train(x=x_cols, y=y_col, training_frame=train_ext, validation_frame=valid_ext, leaderboard_frame=test_ext)
119+
120+
preds = aml.leader.predict(test_ext)
121+
return preds["predict"]
122+
123+
124+
def evaluate_lama(train_df: pd.DataFrame, test_df: pd.DataFrame) -> pd.DataFrame:
125+
"""
126+
Train a LAMA model on the provided training and testing data.
127+
128+
Args:
129+
train_df (pd.DataFrame): The training data.
130+
test_df (pd.DataFrame): The testing data.
131+
"""
132+
# !pip install lightautoml[nlp]
133+
from lightautoml.automl.presets.text_presets import TabularNLPAutoML
134+
from lightautoml.tasks import Task
135+
# pytorch<2.7.0
136+
# https://github.com/sb-ai-lab/LightAutoML/issues/173
137+
138+
automl = TabularNLPAutoML(task=Task(name="multiclass", metric="f1_macro"))
139+
automl.fit_predict(train_df, roles={"target": "label"})
140+
test_preds = automl.predict(test_df).data
141+
return np.argmax(test_preds, axis=-1)
142+
143+
144+
def evaluate_gama(train_df: pd.DataFrame, test_df: pd.DataFrame) -> pd.DataFrame:
145+
"""
146+
Train a GAMA model on the provided training and testing data.
147+
148+
Args:
149+
train_df (pd.DataFrame): The training data.
150+
test_df (pd.DataFrame): The testing data.
151+
"""
152+
# NOT WORKING
153+
# ValueError: population must be at least size 3 for a pair to be selected
154+
raise NotImplementedError("GAMA is not working yet.")
155+
# !pip install gama
156+
from gama import GamaClassifier
157+
158+
automl = GamaClassifier(max_total_time=180, store="nothing")
159+
automl.fit(train_df[["utterance"]], train_df[["label"]])
160+
161+
162+
def evaluate_glueon(train_df: pd.DataFrame, test_df: pd.DataFrame) -> pd.DataFrame:
163+
"""
164+
Train a GlueOn model on the provided training and testing data.
165+
166+
Args:
167+
train_df (pd.DataFrame): The training data.
168+
test_df (pd.DataFrame): The testing data.
169+
"""
170+
#!pip install autogluon
171+
from autogluon.multimodal import MultiModalPredictor
172+
import uuid
173+
174+
model_path = f"/tmp/{uuid.uuid4().hex}-automm_sst"
175+
predictor = MultiModalPredictor(label="label", problem_type="multiclass", eval_metric="acc", path=model_path)
176+
predictor.fit(train_df, time_limit=180)
177+
predictions = predictor.predict(test_df)
178+
return predictions
179+
180+
181+
def main():
182+
parser = argparse.ArgumentParser(description="Evaluate AutoML models on a dataset.")
183+
parser.add_argument(
184+
"--dataset",
185+
type=str,
186+
help="The name of the dataset to evaluate.",
187+
)
188+
parser.add_argument(
189+
"--framework",
190+
type=str,
191+
choices=["fedot", "h2o", "lama", "gama", "glueon"],
192+
help="The name of the model to evaluate.",
193+
)
194+
args = parser.parse_args()
195+
dataset_name = args.dataset
196+
framework = args.framework
197+
run = wandb.init(
198+
project="AutoML-Eval",
199+
name=f"eval-{dataset_name}-{framework}",
200+
tags=[dataset_name, framework],
201+
config={
202+
"dataset": dataset_name,
203+
"framework": framework,
204+
},
205+
)
206+
# Load the dataset
207+
train_df, test_df = load_data(dataset_name)
208+
209+
# Evaluate the model
210+
if framework == "fedot":
211+
predictions = evalute_fedot(train_df, test_df)
212+
elif framework == "h2o":
213+
predictions = evaluate_h2o(train_df, test_df)
214+
elif framework == "lama":
215+
predictions = evaluate_lama(train_df, test_df)
216+
elif framework == "gama":
217+
predictions = evaluate_gama(train_df, test_df)
218+
elif framework == "glueon":
219+
predictions = evaluate_glueon(train_df, test_df)
220+
else:
221+
raise ValueError(f"Unknown framework: {framework}")
222+
# Log the predictions
223+
run.log({"predictions": wandb.Table(dataframe=pd.DataFrame(predictions))})
224+
# Log the classification report
225+
report = classification_report(test_df["label"], predictions, output_dict=True)
226+
run.log(report)
227+
# Finish the run
228+
run.finish()
229+
230+
231+
if __name__ == "__main__":
232+
main()

0 commit comments

Comments
 (0)