thisisartium · paulz · Feb 28, 2025 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/docs/local-development.md b/docs/local-development.md
@@ -7,6 +7,7 @@ The first step will be just to be able to run the first version of your prompt a
 
 Imagine we have a python project called `team_recommender` where we recommend teams of developers to be used on a given project. The basic structure looks like this:
 
+```
 team_recommender/
 ├── README.md
 ├── requirements.txt
@@ -21,7 +22,7 @@ team_recommender/
     ├── __init__.py
     ├── test_allocations.py
     └── settings.py
-
+```
 
 ## Single Test
 We start running a test prompt inside a pytest to check that the LLM is recommending us developers that we think have relevant skills based off our fixture data:
@@ -183,7 +184,7 @@ def test_allocations():
         run_allocation_test,
         reporter=test_reporter,
     )
-    results = test_runner.run_loop(tries)
+    results = test_runner.run_multiple(tries)
     assert False not in results
 
 
@@ -336,7 +337,7 @@ def test_allocations():
         ),
         reporter=test_reporter,
     )
-    results = test_runner.run_loop(tries)
+    results = test_runner.run_multiple(tries)
     assert False not in results
 
 

diff --git a/examples/team_recommender/tests/example_1_unit/test_allocations_unit.py b/examples/team_recommender/tests/example_1_unit/test_allocations_unit.py
@@ -42,8 +42,5 @@ def test_allocations():
         response_format={"type": "json_object"},
     )
     response = completion.choices[0].message.content
-    person_with_relevant_skill_was_selected = any(
-        name in response for name in acceptable_people
-    )
+    person_with_relevant_skill_was_selected = any(name in response for name in acceptable_people)
     assert person_with_relevant_skill_was_selected
-
diff --git a/examples/team_recommender/tests/example_2_loop/test_allocations_loop.py b/examples/team_recommender/tests/example_2_loop/test_allocations_loop.py
@@ -43,7 +43,7 @@ def test_allocations():
         run_allocation_test,
         reporter=test_reporter,
     )
-    results = test_runner.run_loop(tries)
+    results = test_runner.run_multiple(tries)
     assert False not in results
 
 
@@ -69,4 +69,3 @@ def run_allocation_test(reporter) -> bool:
     except json.JSONDecodeError as e:
         print(f"JSON Exception: {e}")
     return result
-
diff --git a/.../team_recommender/tests/example_3_loop_no_hallucinating/test_allocations_hallucinating.py b/.../team_recommender/tests/example_3_loop_no_hallucinating/test_allocations_hallucinating.py
@@ -7,11 +7,7 @@
 
 
 def get_all_developer_names(skills_data) -> set[str]:
-    return {
-        developer["developer"]["name"]
-        for skill in skills_data["skills"]
-        for developer in skill["developerSkills"]
-    }
+    return {developer["developer"]["name"] for skill in skills_data["skills"] for developer in skill["developerSkills"]}
 
 
 def get_developer_names_from_response(response) -> set[str]:
@@ -52,12 +48,10 @@ def test_allocations():
         output_dir=ROOT_DIR,
     )
     test_runner = Runner(
-        lambda reporter: run_allocation_test(
-            reporter=reporter, skills_data=skills_data
-        ),
+        lambda reporter: run_allocation_test(reporter=reporter, skills_data=skills_data),
         reporter=test_reporter,
     )
-    results = test_runner.run_loop(tries)
+    results = test_runner.run_multiple(tries)
     assert False not in results
 
 
@@ -82,9 +76,7 @@ def run_allocation_test(reporter, skills_data) -> bool:
     try:
         json_object = json.loads(response)
         developer_names = get_developer_names_from_response(json_object)
-        no_developer_name_is_hallucinated = False not in [
-            name in all_developers for name in developer_names
-        ]
+        no_developer_name_is_hallucinated = False not in [name in all_developers for name in developer_names]
 
         reporter.report(
             json_object,

diff --git a/.../team_recommender/tests/example_4_gate_on_success_threshold/test_allocations_threshold.py b/.../team_recommender/tests/example_4_gate_on_success_threshold/test_allocations_threshold.py
@@ -1,22 +1,21 @@
 import json
 import os
+
+from openai import OpenAI
+from tests.settings import ROOT_DIR
+
 from cat_ai.reporter import Reporter
 from cat_ai.runner import Runner
-from tests.settings import ROOT_DIR
-from openai import OpenAI
 
 
 def get_all_developer_names(skills_data) -> set[str]:
-    return {
-        developer["developer"]["name"]
-        for skill in skills_data["skills"]
-        for developer in skill["developerSkills"]
-    }
+    return {developer["developer"]["name"] for skill in skills_data["skills"] for developer in skill["developerSkills"]}
 
 
 def get_developer_names_from_response(response) -> set[str]:
     return {developer["name"] for developer in response["developers"]}
 
+
 def has_expected_success_rate(results: list[bool], expected_success_rate: float) -> bool:
     if not results:
         return True
@@ -27,8 +26,9 @@ def has_expected_success_rate(results: list[bool], expected_success_rate: float)
     print(1.0 - failure_rate)
     return expected_success_rate <= (1.0 - failure_rate)
 
+
 def test_allocations():
-    tries = Runner.sample_size(3)
+    tries = Runner.get_sample_size(3)
     skills_json_path = os.path.join(ROOT_DIR, "fixtures", "skills.json")
     with open(skills_json_path, "r") as file:
         skills_data = json.load(file)
@@ -61,12 +61,10 @@ def test_allocations():
         output_dir=ROOT_DIR,
     )
     test_runner = Runner(
-        lambda reporter: run_allocation_test(
-            reporter=reporter, skills_data=skills_data
-        ),
+        lambda reporter: run_allocation_test(reporter=reporter, skills_data=skills_data),
         reporter=test_reporter,
     )
-    results = test_runner.run_loop(tries)
+    results = test_runner.run_multiple(tries)
     failure_threshold = 0.8
     assert has_expected_success_rate(results, failure_threshold)
 
@@ -92,9 +90,7 @@ def run_allocation_test(reporter, skills_data) -> bool:
     try:
         json_object = json.loads(response)
         developer_names = get_developer_names_from_response(json_object)
-        no_developer_name_is_hallucinated = False not in [
-            name in all_developers for name in developer_names
-        ]
+        no_developer_name_is_hallucinated = False not in [name in all_developers for name in developer_names]
 
         reporter.report(
             json_object,

diff --git a/examples/team_recommender/tests/settings.py b/examples/team_recommender/tests/settings.py
@@ -1,4 +1,4 @@
 import os
 
 
-ROOT_DIR = os.path.dirname(os.path.abspath(os.path.join("..", __file__)))
+ROOT_DIR = os.path.dirname(os.path.abspath(os.path.join("..", __file__)))
diff --git a/qodana.yaml b/qodana.yaml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------------------------#
+#               Qodana analysis is configured by qodana.yaml file               #
+#             https://www.jetbrains.com/help/qodana/qodana-yaml.html            #
+#-------------------------------------------------------------------------------#
+version: "1.0"
+#Specify inspection profile for code analysis
+profile:
+  name: qodana.starter
+#Enable inspections
+#include:
+#  - name: <SomeEnabledInspectionId>
+#Disable inspections
+#exclude:
+#  - name: <SomeDisabledInspectionId>
+#    paths:
+#      - <path/where/not/run/inspection>
+#Execute shell command before Qodana execution (Applied in CI/CD pipeline)
+#bootstrap: sh ./prepare-qodana.sh
+#Install IDE plugins before Qodana execution (Applied in CI/CD pipeline)
+#plugins:
+#  - id: <plugin.id> #(plugin id can be found at https://plugins.jetbrains.com)
+#Specify Qodana linter for analysis (Applied in CI/CD pipeline)
+linter: jetbrains/qodana-python:2024.3
+exclude:
+  - name: All
+    paths:
+      - docs
diff --git a/src/cat_ai/publish_to_gdrive.py b/src/cat_ai/publish_to_gdrive.py
@@ -4,6 +4,7 @@
 from pydrive2.auth import GoogleAuth  # type: ignore
 from pydrive2.drive import GoogleDrive  # type: ignore
 
+
 def login_with_service_account(credentials_path: str) -> GoogleAuth:
     """
     Google Drive service with a service account.

diff --git a/src/cat_ai/reporter.py b/src/cat_ai/reporter.py
@@ -1,7 +1,7 @@
 import json
 import os
 from datetime import datetime
-from typing import Any, Dict
+from typing import Optional, Any, Dict
 
 
 class Reporter:
@@ -14,10 +14,10 @@ def _create_unique_id_from_time() -> str:
         return datetime.now().strftime("%Y%m%d_%H%M%S")
 
     def __init__(
-        self, test_name: str, output_dir: str, unique_id: str | None = None, metadata: Dict[str, Any] = {}
+        self, test_name: str, output_dir: str, unique_id: str | None = None, metadata: Optional[Dict[str, Any]] = None
     ) -> None:
         self.test_name = test_name
-        self.metadata = metadata
+        self.metadata = metadata or {}
         if not unique_id:
             unique_id = self._create_unique_id_from_time()
         unique_dir_name = f"{test_name}-{unique_id}"

diff --git a/src/cat_ai/runner.py b/src/cat_ai/runner.py
@@ -1,25 +1,59 @@
 import os
+from typing import Callable, List, Optional
 
 from .reporter import Reporter
-from typing import Callable, Any
 
 
 class Runner:
-    def __init__(self, test_function: Callable[..., Any], reporter: Reporter) -> None:
+    """Executes test functions and collects results using a reporter."""
+
+    def __init__(self, test_function: Callable[[Reporter], bool], reporter: Reporter) -> None:
+        """
+        Initialize the Runner with a test function and reporter.
+
+        Args:
+            test_function: Function to execute during test runs
+            reporter: Reporter instance to track and report test results
+        """
         self.reporter = reporter
         self.test_function = test_function
 
     @staticmethod
-    def sample_size(default_size: int = 1) -> int:
+    def get_sample_size(default_size: int = 1) -> int:
+        """
+        Get sample size from environment variable or use default.
+
+        Args:
+            default_size: Default sample size if not specified in environment
+
+        Returns:
+            Number of test runs to perform
+        """
         return int(os.getenv("CAT_AI_SAMPLE_SIZE", str(default_size)))
 
-    def run_once(self, run_number: int = 0) -> Any:
+    def run_once(self, run_number: int = 0) -> bool:
+        """
+        Execute the test function once.
+
+        Args:
+            run_number: Current run index for reporting
+
+        Returns:
+            Result from the test function
+        """
         self.reporter.run_number = run_number
-        result = self.test_function(reporter=self.reporter)
-        return result
-
-    def run_loop(self, tries: int = sample_size()) -> list[Any]:
-        results = []
-        for x in range(0, tries):
-            results.append(self.run_once(x))
-        return results
+        return self.test_function(self.reporter)
+
+    def run_multiple(self, sample_size: Optional[int] = None) -> List[bool]:
+        """
+        Execute the test function multiple times based on sample size.
+
+        Args:
+            sample_size: Number of times to run the test, defaults to
+                         value from get_sample_size() if None
+
+        Returns:
+            List of results from all test runs
+        """
+        runs = sample_size if sample_size is not None else self.get_sample_size()
+        return [self.run_once(i) for i in range(runs)]
diff --git a/tests/cat_ai/test_reporter.py b/tests/cat_ai/test_reporter.py
@@ -1,8 +1,8 @@
 import json
 import time
 from unittest.mock import mock_open, patch, MagicMock
-from cat_ai.reporter import Reporter
-from cat_ai.helpers.helpers import root_dir
+from src.cat_ai.reporter import Reporter
+from src.cat_ai.helpers.helpers import root_dir
 
 
 def test_reporter_create_a_unique_folder_path() -> None:
@@ -18,18 +18,14 @@ def test_reporter_create_a_unique_folder_path() -> None:
 def test_reporter_can_accept_unique_id_override() -> None:
     test_name = "id_override"
     unique_id = "some_string"
-    reporter1 = Reporter(
-        test_name=test_name, output_dir=root_dir(), unique_id=unique_id
-    )
+    reporter1 = Reporter(test_name=test_name, output_dir=root_dir(), unique_id=unique_id)
     expected_dir_path = f"{root_dir()}/test_runs/{test_name}-{unique_id}"
     assert str(expected_dir_path) == str(reporter1.folder_path)
 
 
 @patch("os.makedirs")
 @patch("builtins.open", new_callable=mock_open)
-def test_report_creates_correct_json(
-    mock_open: MagicMock, mock_makedirs: MagicMock
-) -> None:
+def test_report_creates_correct_json(mock_open: MagicMock, mock_makedirs: MagicMock) -> None:
     test_name = "report_creates_correct_json"
     unique_id = "20231001_120000"
     reporter = Reporter(test_name=test_name, output_dir=root_dir(), unique_id=unique_id)

diff --git a/tests/cat_ai/test_runner.py b/tests/cat_ai/test_runner.py
@@ -0,0 +1,64 @@
+from src.cat_ai.reporter import Reporter
+from src.cat_ai.runner import Runner
+
+
+# Dummy test function that will be passed to Runner
+def dummy_test_function(reporter: Reporter) -> bool:
+    # Imagine that this function does something meaningful
+    # Simply returning True instead of trying to log
+    return True
+
+
+def test_runner_sample_size(monkeypatch):
+    # Set an environment variable to test
+    monkeypatch.setenv("CAT_AI_SAMPLE_SIZE", "5")
+    assert Runner.get_sample_size() == 5
+
+    # Test default size
+    monkeypatch.delenv("CAT_AI_SAMPLE_SIZE", raising=False)
+    assert Runner.get_sample_size(default_size=3) == 3
+
+
+def test_run_once():
+    # Create a Reporter with necessary arguments
+    reporter = Reporter(test_name="test_run_once", output_dir="/tmp")
+
+    # Initialize Runner with dummy test function and Reporter
+    runner = Runner(test_function=dummy_test_function, reporter=reporter)
+
+    # Test run_once
+    result = runner.run_once()
+    assert result is True
+    assert reporter.run_number == 0
+
+
+def test_run_multiple():
+    # Create a Reporter with necessary arguments
+    reporter = Reporter(test_name="test_run", output_dir="/tmp")
+
+    # Initialize Runner with dummy test function and Reporter
+    runner = Runner(test_function=dummy_test_function, reporter=reporter)
+
+    # Test with explicit sample size parameter
+    results = runner.run_multiple(sample_size=2)
+    assert len(results) == 2
+    assert all(results)
+    expected_results = [True, True]
+    assert results == expected_results
+
+
+def test_run_with_env_variable(monkeypatch):
+    # Set the environment variable for a controlled test
+    monkeypatch.setenv("CAT_AI_SAMPLE_SIZE", "3")
+
+    # Create a Reporter with necessary arguments
+    reporter = Reporter(test_name="test_run_with_env", output_dir="/tmp")
+
+    # Initialize Runner with dummy test function and Reporter
+    runner = Runner(test_function=dummy_test_function, reporter=reporter)
+
+    # Test without explicit sample size (should use environment variable)
+    results = runner.run_multiple()
+    assert len(results) == 3
+    expected_results = [True, True, True]
+    assert results == expected_results