-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.py
More file actions
110 lines (91 loc) · 3.03 KB
/
eval.py
File metadata and controls
110 lines (91 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import argparse
import os
import editdistance
import numpy as np
from tqdm import tqdm
from llm_tw_word.translate import OpenAITranslator
from llm_tw_word.translate import LlamaTranslator
from llm_tw_word.const import TRANSLATOR_TYPE
from llm_tw_word.const import DEFAULT_LLAMA_MODEL
from llm_tw_word.const import DEFAULT_OPENAI_MODEL
from llm_tw_word.io import load_json
from llm_tw_word.io import save_json
from llm_tw_word.io import mkdir_p
def parse_args():
parser = argparse.ArgumentParser(
description="Script for model performance evaluation",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"translator",
type=str,
choices=(
TRANSLATOR_TYPE.LLAMA,
TRANSLATOR_TYPE.OPENAI,
),
help="Translator type",
)
parser.add_argument(
"--model",
type=str,
default=None,
help="Specified model name for the translator. If not provided, there"
" will be a default model",
)
parser.add_argument(
"--data",
type=str,
default="data/dataset/test.json",
help="Path to dataset file",
)
parser.add_argument(
"--batch",
type=int,
default=16,
help="Batch size",
)
parser.add_argument(
"-o",
"--output",
type=str,
default="output_eval",
help="Path to output directory",
)
args = parser.parse_args()
return args
def main(args):
data_path = args.data
translator_name = args.translator
model_name = args.model
batch_size = args.batch
output_dir = args.output
mkdir_p(output_dir)
if translator_name == TRANSLATOR_TYPE.LLAMA:
model_name = model_name if model_name else DEFAULT_LLAMA_MODEL
translator = LlamaTranslator(model_name=model_name)
else:
model_name = model_name if model_name else DEFAULT_OPENAI_MODEL
translator = OpenAITranslator(model_name=model_name)
samples = load_json(data_path)
print(f"Running translator: {translator_name}, model: {model_name}")
results = []
for idx in tqdm(range(0, len(samples), batch_size)):
sub_samples = samples[idx:min(idx+batch_size, len(samples))]
texts = [sample["text_trad"] for sample in sub_samples]
expecteds = [sample["text_tw"] for sample in sub_samples]
preds = translator.translate(texts)
for text_trad, text_tw, pred in zip(texts, expecteds, preds):
distance = editdistance.eval(text_tw, pred)
results.append({
"text_trad": text_trad,
"text_tw": text_tw,
"pred": pred,
"distance": distance,
})
avg_distance = np.mean([r["distance"] for r in results])
output_path = os.path.join(output_dir, f"{translator_name}.json")
save_json(output_path, results)
print(f"Average of Edit Distance: {avg_distance}")
print(f"Evaluation Results saved :{output_path}")
if __name__ == "__main__":
main(parse_args())