Skip to content

Commit 75568b9

Browse files
fixed the changes
1 parent 0cccb03 commit 75568b9

File tree

1 file changed

+17
-10
lines changed

1 file changed

+17
-10
lines changed

llmtune/cli/toolkit.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from os.path import exists, join
44

55
import torch
6+
import pandas as pd
67
import typer
78
import yaml
89
from pydantic import ValidationError
@@ -15,7 +16,7 @@
1516
from llmtune.ui.rich_ui import RichUI
1617
from llmtune.utils.ablation_utils import generate_permutations
1718
from llmtune.utils.save_utils import DirectoryHelper
18-
19+
from llmtune.qa.generics import QaTestRegistry, LLMTestSuite
1920

2021
hf_utils.logging.set_verbosity_error()
2122
torch._logging.set_logs(all=logging.CRITICAL)
@@ -73,15 +74,21 @@ def run_one_experiment(config: Config, config_path: str) -> None:
7374
else:
7475
RichUI.inference_found(results_path)
7576

76-
# QA -------------------------------
77-
# RichUI.before_qa()
78-
# qa_path = dir_helper.save_paths.qa
79-
# if not exists(qa_path) or not listdir(qa_path):
80-
# # TODO: Instantiate unit test classes
81-
# # TODO: Load results.csv
82-
# # TODO: Run Unit Tests
83-
# # TODO: Save Unit Test Results
84-
# pass
77+
RichUI.before_qa()
78+
qa_path = dir_helper.save_paths.qa
79+
if not exists(qa_path) or not listdir(qa_path):
80+
# TODO: Instantiate unit test classes
81+
llm_tests = config.get("qa", {}).get("llm_tests", [])
82+
tests = QaTestRegistry.create_tests_from_list(llm_tests)
83+
# TODO: Load results.csv
84+
results_df = pd.read_csv(results_file_path)
85+
prompts = results_df["prompt"].tolist()
86+
ground_truths = results_df["ground_truth"].tolist()
87+
model_preds = results_df["model_prediction"].tolist()
88+
# TODO: Run Unit Tests
89+
test_suite = LLMTestSuite(tests, prompts, ground_truths, model_preds)
90+
# TODO: Save Unit Test Results
91+
test_suite.save_test_results("unit_test_results.csv")
8592

8693

8794
@app.command()

0 commit comments

Comments
 (0)