Provide insights about test results to help code gen

JamesVorder · JamesVorder · commit 4c617776a807 · 2024-07-16T07:36:39.000-05:00
diff --git a/TestRunner/GenericTestRunner.py b/TestRunner/GenericTestRunner.py
@@ -1,16 +1,13 @@
-from abc import ABCMeta, abstractmethod
-
 import pytest
-
+import subprocess
+from abc import ABCMeta, abstractmethod
 from pytest_plugins import ResultsCollector, SessionStartPlugin
 
 
 class GenericTestRunner(metaclass=ABCMeta):
     @abstractmethod
     def run(self, *args, **kwargs) -> (int, str): pass
 
-import subprocess
-
 
 class SubProcessTestRunner(GenericTestRunner):
     code_dir: str
diff --git a/TestRunner/SubProcessTestRunner.py b/TestRunner/SubProcessTestRunner.py
diff --git a/main.py b/main.py
@@ -34,7 +34,7 @@ def generate_first_attempt(class_skeleton: str) -> None:
         _out.write(response.content)
 
 
-def generate_next_attempt(test_results: str) -> None:
+def generate_next_attempt(test_results: str, test_results_insights: str) -> None:
     cfg = lr.ChatAgentConfig(
         llm=lr.language_models.OpenAIGPTConfig(
             chat_model="ollama/llama3:latest",
@@ -53,6 +53,8 @@ def generate_next_attempt(test_results: str) -> None:
             {code_snippet}
             Here are the test results:
             {test_results}
+            In addition, you may consider these insights about the test results when coming up with your solution:
+            {test_results_insights}
             Update the code so that the tests will pass.
             Your output MUST contain all the same classes and methods as the input code.
             Do NOT add any other methods or commentary.
@@ -66,6 +68,27 @@ def generate_next_attempt(test_results: str) -> None:
         _out.write(response.content)
 
 
+def interpret_test_results(results: str) -> str:
+    cfg = lr.ChatAgentConfig(
+        llm=lr.language_models.OpenAIGPTConfig(
+            chat_model="ollama/llama3:latest",
+            chat_context_length=8000,
+        ),
+        vecdb=None
+    )
+    agent = lr.ChatAgent(cfg)
+    prompt = f"""
+        You are an expert at interpreting the results of unit tests, and providing insight into what they mean.
+        You should be descriptive about what variables are incorrect, and in what way.
+        You should include information about which methods should be modified, and in what way.
+        You should generally not provide code.
+        Please provide insights about the following test results:
+        {results}
+    """
+    response = agent.llm_response(prompt)
+    return response.content
+
+
 def teardown() -> None:
     codegen_path = os.path.join(".", "generated")
     build_path = os.path.join(".", "build")
@@ -89,7 +112,8 @@ def chat(class_skeleton: str, test_dir: str, test_runner: GenericTestRunner, max
             print("Done!")
             break
         elif test_exit_code == 1:
-            generate_next_attempt(test_results)
+            results_insights = interpret_test_results(test_results)
+            generate_next_attempt(test_results, results_insights)
         else:
             solved = True
             print("There is some problem with the test suite itself.")
@@ -116,7 +140,7 @@ def main(
     assert os.path.isfile(class_skeleton), f"The class skeleton file provided does not exist! Got {class_skeleton}"
     assert os.path.exists(test_dir), f"The test-dir provided does not exist! Got {test_dir}"
 
-    tr: GenericTestRunner = InlineTestRunner("", test_dir)
+    tr: GenericTestRunner = SubProcessTestRunner("", test_dir)
     chat(
         class_skeleton=class_skeleton,
         test_dir=test_dir,