-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluation.py
More file actions
141 lines (114 loc) · 5.72 KB
/
evaluation.py
File metadata and controls
141 lines (114 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import Dataset
from pathlib import Path
from tqdm import tqdm
class EvaluateModel:
"""
Class for evaluating the model for either goal 1 or 2 using test data.
"""
def __init__(
self,
model_path: Path,
save_path: Path = Path("results"),
batch_size: int = 16,
dataset_columns: list = ["non_gendered", "gendered"],
prefix: str = "Bringe den Satz in eine ungegenderte Form: "
):
"""
:param model_path: The path to the trained model.
:param save_path: The path to save the results.
:param batch_size: The batch size for generating predictions.
:param dataset_columns: The columns of the dataset. The first column should be the target sentence, the second the input sentence.
:param prefix: The prefix to add to the input sentence, being for example "Übersetze den Satz in XY: ".
"""
self.model = T5ForConditionalGeneration.from_pretrained(model_path)
self.tokenizer = T5Tokenizer.from_pretrained(model_path)
self.save_path = save_path / "results.csv"
self.batch_size = batch_size
self.dataset_columns = dataset_columns
self.prefix = prefix
def eval_with_dataset(self, test_data_path: Path) -> None:
"""
Evaluate the model with the test dataset. The method generates predictions for the non-gendered
sentences and saves the results to a csv file.
:param test_data_path: The path to the test dataset.
"""
if not test_data_path.exists():
raise FileNotFoundError(f"Cannot find evaluation data at {test_data_path}.")
test_data_df = pd.read_csv(test_data_path)
test_data_df = test_data_df[self.dataset_columns]
test_data_df = test_data_df.dropna() # Drop rows with missing values
test_dataset = Dataset.from_pandas(test_data_df)
test_dataset = test_dataset.map(self.__preprocess_data, batched=True)
predictions = []
for i in tqdm(range(0, len(test_dataset), self.batch_size)):
batch = test_dataset[i:i+self.batch_size]
batch_predictions = self.__generate_predictions(batch)
predictions.extend(batch_predictions)
results = pd.DataFrame({
self.dataset_columns[0]: test_data_df[self.dataset_columns[0]],
self.dataset_columns[1]: test_data_df[self.dataset_columns[1]],
"predicted": predictions
})
results.to_csv(self.save_path, index=False)
def __preprocess_data(self, examples: list) -> dict:
"""
Preprocess the data for the model.
:param examples: The examples to preprocess.
:return: The preprocessed examples.
"""
inputs = [self.prefix + sentence for sentence in examples[self.dataset_columns[1]]]
model_inputs = self.tokenizer(
inputs, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
return model_inputs
def __generate_predictions(self, examples) -> dict:
"""
Generate predictions for the model.
:param examples: The examples to generate predictions for.
:return: The generated predictions.
"""
inputs = self.tokenizer(examples[self.dataset_columns[0]], return_tensors="pt", max_length=512, truncation=True, padding="max_length")
input_ids = inputs.input_ids
attention_mask = inputs.attention_mask
# Generate predictions using the model
outputs = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=512)
predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
return predictions
def eval_with_sentence(self, sentence: str) -> str:
"""
Evaluate the model with a single sentence.
:param sentence: The sentence to evaluate.
:return: The predicted sentence.
"""
inputs = self.tokenizer(sentence, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
input_ids = inputs.input_ids
attention_mask = inputs.attention_mask
# Generate predictions using the model
outputs = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=512)
predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
return predictions[0]
if __name__ == "__main__":
# Evaluation of goal 1
experiment_path = Path("experiments", "flan_t5_finetuning_correlaid", "2025-01-09_10-37-49")
model_path = Path(experiment_path, "model")
save_path = Path(experiment_path, "results")
eval_model = EvaluateModel(model_path, save_path)
test_sentence = "Bringe den Satz in eine ungenderte Form: Die Lehrer*innen sind cool."
print("Predicted sentence:", eval_model.eval_with_sentence(test_sentence))
test_data_path = Path("data", "standard", "test.csv")
eval_model.eval_with_dataset(test_data_path)
# Evaluation of goal 2
experiment_path = Path("experiments", "flan_t5_finetuning_inclusive_form", "2025-01-26_19-33-13")
model_path = Path(experiment_path, "model")
save_path = Path(experiment_path, "results")
eval_model = EvaluateModel(
model_path,
save_path,
dataset_columns=["enhanced", "gendered"],
prefix = "Bringe den Satz in eine inklusive Form: "
)
test_sentence = "Bringe den Satz in eine inklusive Form: Die Lehrer*innen sind cool."
print("Predicted sentence:", eval_model.eval_with_sentence(test_sentence))
test_data_path = Path("data", "inclusive_form", "v1", "test.csv")
eval_model.eval_with_dataset(test_data_path)