Merge pull request #131 from viveksingh-ctrl/integrate-main-qa

benjaminye · web-flow · commit 0352c770557a · 2024-04-11T11:59:02.000-04:00
Integrate main qa
diff --git a/llmtune/cli/toolkit.py b/llmtune/cli/toolkit.py
@@ -15,6 +15,7 @@
 from llmtune.finetune.lora import LoRAFinetune
 from llmtune.inference.lora import LoRAInference
 from llmtune.pydantic_models.config_model import Config
+from llmtune.qa.generics import LLMTestSuite, QaTestRegistry
 from llmtune.ui.rich_ui import RichUI
 from llmtune.utils.ablation_utils import generate_permutations
 from llmtune.utils.save_utils import DirectoryHelper
@@ -84,15 +85,13 @@ def run_one_experiment(config: Config, config_path: Path) -> None:
     else:
         RichUI.results_found(results_path)
 
-    # QA -------------------------------
-    # RichUI.before_qa()
-    # qa_path = dir_helper.save_paths.qa
-    # if not exists(qa_path) or not listdir(qa_path):
-    #     # TODO: Instantiate unit test classes
-    #     # TODO: Load results.csv
-    #     # TODO: Run Unit Tests
-    #     # TODO: Save Unit Test Results
-    #     pass
+    RichUI.before_qa()
+    qa_file_path = dir_helper.save_paths.qa_file
+    if not qa_file_path.exists():
+        llm_tests = config.qa.llm_tests
+        tests = QaTestRegistry.create_tests_from_list(llm_tests)
+        test_suite = LLMTestSuite.from_csv(results_file_path, tests)
+        test_suite.save_test_results(qa_file_path)
 
 
 @app.command("run")
diff --git a/llmtune/qa/generics.py b/llmtune/qa/generics.py
@@ -50,6 +50,14 @@ def __init__(
 
         self.test_results = {}
 
+    @staticmethod
+    def from_csv(file_path: str, tests: List[LLMQaTest]) -> "LLMTestSuite":
+        results_df = pd.read_csv(file_path)
+        prompts = results_df["prompt"].tolist()
+        ground_truths = results_df["ground_truth"].tolist()
+        model_preds = results_df["model_prediction"].tolist()
+        return LLMTestSuite(tests, prompts, ground_truths, model_preds)
+
     def run_tests(self) -> Dict[str, List[Union[float, int, bool]]]:
         test_results = {}
         for test in zip(self.tests):