fixed the changes

viveksingh-ctrl · web-flow · commit 75568b903fc5 · 2024-04-08T20:05:11.000+05:30
diff --git a/llmtune/cli/toolkit.py b/llmtune/cli/toolkit.py
@@ -3,6 +3,7 @@
 from os.path import exists, join
 
 import torch
+import pandas as pd
 import typer
 import yaml
 from pydantic import ValidationError
@@ -15,7 +16,7 @@
 from llmtune.ui.rich_ui import RichUI
 from llmtune.utils.ablation_utils import generate_permutations
 from llmtune.utils.save_utils import DirectoryHelper
-
+from llmtune.qa.generics import QaTestRegistry, LLMTestSuite
 
 hf_utils.logging.set_verbosity_error()
 torch._logging.set_logs(all=logging.CRITICAL)
@@ -73,15 +74,21 @@ def run_one_experiment(config: Config, config_path: str) -> None:
     else:
         RichUI.inference_found(results_path)
 
-    # QA -------------------------------
-    # RichUI.before_qa()
-    # qa_path = dir_helper.save_paths.qa
-    # if not exists(qa_path) or not listdir(qa_path):
-    #     # TODO: Instantiate unit test classes
-    #     # TODO: Load results.csv
-    #     # TODO: Run Unit Tests
-    #     # TODO: Save Unit Test Results
-    #     pass
+    RichUI.before_qa()
+    qa_path = dir_helper.save_paths.qa
+    if not exists(qa_path) or not listdir(qa_path):
+        # TODO: Instantiate unit test classes
+        llm_tests = config.get("qa", {}).get("llm_tests", [])
+        tests = QaTestRegistry.create_tests_from_list(llm_tests)
+        # TODO: Load results.csv
+        results_df = pd.read_csv(results_file_path)
+        prompts = results_df["prompt"].tolist()
+        ground_truths = results_df["ground_truth"].tolist()
+        model_preds = results_df["model_prediction"].tolist()
+        # TODO: Run Unit Tests
+        test_suite = LLMTestSuite(tests, prompts, ground_truths, model_preds)
+        # TODO: Save Unit Test Results
+        test_suite.save_test_results("unit_test_results.csv")
 
 
 @app.command()