diff --git a/docs/local-development.md b/docs/local-development.md index 638d232..bae7b3a 100644 --- a/docs/local-development.md +++ b/docs/local-development.md @@ -7,6 +7,7 @@ The first step will be just to be able to run the first version of your prompt a Imagine we have a python project called `team_recommender` where we recommend teams of developers to be used on a given project. The basic structure looks like this: +``` team_recommender/ ├── README.md ├── requirements.txt @@ -21,7 +22,7 @@ team_recommender/ ├── __init__.py ├── test_allocations.py └── settings.py - +``` ## Single Test We start running a test prompt inside a pytest to check that the LLM is recommending us developers that we think have relevant skills based off our fixture data: @@ -183,7 +184,7 @@ def test_allocations(): run_allocation_test, reporter=test_reporter, ) - results = test_runner.run_loop(tries) + results = test_runner.run_multiple(tries) assert False not in results @@ -336,7 +337,7 @@ def test_allocations(): ), reporter=test_reporter, ) - results = test_runner.run_loop(tries) + results = test_runner.run_multiple(tries) assert False not in results diff --git a/examples/team_recommender/tests/example_1_unit/test_allocations_unit.py b/examples/team_recommender/tests/example_1_unit/test_allocations_unit.py index d67e800..a87c6ae 100644 --- a/examples/team_recommender/tests/example_1_unit/test_allocations_unit.py +++ b/examples/team_recommender/tests/example_1_unit/test_allocations_unit.py @@ -42,8 +42,5 @@ def test_allocations(): response_format={"type": "json_object"}, ) response = completion.choices[0].message.content - person_with_relevant_skill_was_selected = any( - name in response for name in acceptable_people - ) + person_with_relevant_skill_was_selected = any(name in response for name in acceptable_people) assert person_with_relevant_skill_was_selected - diff --git a/examples/team_recommender/tests/example_2_loop/test_allocations_loop.py b/examples/team_recommender/tests/example_2_loop/test_allocations_loop.py index 51ed6dd..9bc3b99 100644 --- a/examples/team_recommender/tests/example_2_loop/test_allocations_loop.py +++ b/examples/team_recommender/tests/example_2_loop/test_allocations_loop.py @@ -43,7 +43,7 @@ def test_allocations(): run_allocation_test, reporter=test_reporter, ) - results = test_runner.run_loop(tries) + results = test_runner.run_multiple(tries) assert False not in results @@ -69,4 +69,3 @@ def run_allocation_test(reporter) -> bool: except json.JSONDecodeError as e: print(f"JSON Exception: {e}") return result - diff --git a/examples/team_recommender/tests/example_3_loop_no_hallucinating/test_allocations_hallucinating.py b/examples/team_recommender/tests/example_3_loop_no_hallucinating/test_allocations_hallucinating.py index c6ee124..f62c80f 100644 --- a/examples/team_recommender/tests/example_3_loop_no_hallucinating/test_allocations_hallucinating.py +++ b/examples/team_recommender/tests/example_3_loop_no_hallucinating/test_allocations_hallucinating.py @@ -7,11 +7,7 @@ def get_all_developer_names(skills_data) -> set[str]: - return { - developer["developer"]["name"] - for skill in skills_data["skills"] - for developer in skill["developerSkills"] - } + return {developer["developer"]["name"] for skill in skills_data["skills"] for developer in skill["developerSkills"]} def get_developer_names_from_response(response) -> set[str]: @@ -52,12 +48,10 @@ def test_allocations(): output_dir=ROOT_DIR, ) test_runner = Runner( - lambda reporter: run_allocation_test( - reporter=reporter, skills_data=skills_data - ), + lambda reporter: run_allocation_test(reporter=reporter, skills_data=skills_data), reporter=test_reporter, ) - results = test_runner.run_loop(tries) + results = test_runner.run_multiple(tries) assert False not in results @@ -82,9 +76,7 @@ def run_allocation_test(reporter, skills_data) -> bool: try: json_object = json.loads(response) developer_names = get_developer_names_from_response(json_object) - no_developer_name_is_hallucinated = False not in [ - name in all_developers for name in developer_names - ] + no_developer_name_is_hallucinated = False not in [name in all_developers for name in developer_names] reporter.report( json_object, diff --git a/examples/team_recommender/tests/example_4_gate_on_success_threshold/test_allocations_threshold.py b/examples/team_recommender/tests/example_4_gate_on_success_threshold/test_allocations_threshold.py index f15bc59..3f9dccb 100644 --- a/examples/team_recommender/tests/example_4_gate_on_success_threshold/test_allocations_threshold.py +++ b/examples/team_recommender/tests/example_4_gate_on_success_threshold/test_allocations_threshold.py @@ -1,22 +1,21 @@ import json import os + +from openai import OpenAI +from tests.settings import ROOT_DIR + from cat_ai.reporter import Reporter from cat_ai.runner import Runner -from tests.settings import ROOT_DIR -from openai import OpenAI def get_all_developer_names(skills_data) -> set[str]: - return { - developer["developer"]["name"] - for skill in skills_data["skills"] - for developer in skill["developerSkills"] - } + return {developer["developer"]["name"] for skill in skills_data["skills"] for developer in skill["developerSkills"]} def get_developer_names_from_response(response) -> set[str]: return {developer["name"] for developer in response["developers"]} + def has_expected_success_rate(results: list[bool], expected_success_rate: float) -> bool: if not results: return True @@ -27,8 +26,9 @@ def has_expected_success_rate(results: list[bool], expected_success_rate: float) print(1.0 - failure_rate) return expected_success_rate <= (1.0 - failure_rate) + def test_allocations(): - tries = Runner.sample_size(3) + tries = Runner.get_sample_size(3) skills_json_path = os.path.join(ROOT_DIR, "fixtures", "skills.json") with open(skills_json_path, "r") as file: skills_data = json.load(file) @@ -61,12 +61,10 @@ def test_allocations(): output_dir=ROOT_DIR, ) test_runner = Runner( - lambda reporter: run_allocation_test( - reporter=reporter, skills_data=skills_data - ), + lambda reporter: run_allocation_test(reporter=reporter, skills_data=skills_data), reporter=test_reporter, ) - results = test_runner.run_loop(tries) + results = test_runner.run_multiple(tries) failure_threshold = 0.8 assert has_expected_success_rate(results, failure_threshold) @@ -92,9 +90,7 @@ def run_allocation_test(reporter, skills_data) -> bool: try: json_object = json.loads(response) developer_names = get_developer_names_from_response(json_object) - no_developer_name_is_hallucinated = False not in [ - name in all_developers for name in developer_names - ] + no_developer_name_is_hallucinated = False not in [name in all_developers for name in developer_names] reporter.report( json_object, diff --git a/examples/team_recommender/tests/settings.py b/examples/team_recommender/tests/settings.py index 2495cd1..7fa1b47 100644 --- a/examples/team_recommender/tests/settings.py +++ b/examples/team_recommender/tests/settings.py @@ -1,4 +1,4 @@ import os -ROOT_DIR = os.path.dirname(os.path.abspath(os.path.join("..", __file__))) \ No newline at end of file +ROOT_DIR = os.path.dirname(os.path.abspath(os.path.join("..", __file__))) diff --git a/qodana.yaml b/qodana.yaml new file mode 100644 index 0000000..2937f37 --- /dev/null +++ b/qodana.yaml @@ -0,0 +1,27 @@ +#-------------------------------------------------------------------------------# +# Qodana analysis is configured by qodana.yaml file # +# https://www.jetbrains.com/help/qodana/qodana-yaml.html # +#-------------------------------------------------------------------------------# +version: "1.0" +#Specify inspection profile for code analysis +profile: + name: qodana.starter +#Enable inspections +#include: +# - name: +#Disable inspections +#exclude: +# - name: +# paths: +# - +#Execute shell command before Qodana execution (Applied in CI/CD pipeline) +#bootstrap: sh ./prepare-qodana.sh +#Install IDE plugins before Qodana execution (Applied in CI/CD pipeline) +#plugins: +# - id: #(plugin id can be found at https://plugins.jetbrains.com) +#Specify Qodana linter for analysis (Applied in CI/CD pipeline) +linter: jetbrains/qodana-python:2024.3 +exclude: + - name: All + paths: + - docs diff --git a/src/cat_ai/publish_to_gdrive.py b/src/cat_ai/publish_to_gdrive.py index 9b010ca..2c6a3f2 100644 --- a/src/cat_ai/publish_to_gdrive.py +++ b/src/cat_ai/publish_to_gdrive.py @@ -4,6 +4,7 @@ from pydrive2.auth import GoogleAuth # type: ignore from pydrive2.drive import GoogleDrive # type: ignore + def login_with_service_account(credentials_path: str) -> GoogleAuth: """ Google Drive service with a service account. diff --git a/src/cat_ai/reporter.py b/src/cat_ai/reporter.py index 9e755b3..2781742 100644 --- a/src/cat_ai/reporter.py +++ b/src/cat_ai/reporter.py @@ -1,7 +1,7 @@ import json import os from datetime import datetime -from typing import Any, Dict +from typing import Optional, Any, Dict class Reporter: @@ -14,10 +14,10 @@ def _create_unique_id_from_time() -> str: return datetime.now().strftime("%Y%m%d_%H%M%S") def __init__( - self, test_name: str, output_dir: str, unique_id: str | None = None, metadata: Dict[str, Any] = {} + self, test_name: str, output_dir: str, unique_id: str | None = None, metadata: Optional[Dict[str, Any]] = None ) -> None: self.test_name = test_name - self.metadata = metadata + self.metadata = metadata or {} if not unique_id: unique_id = self._create_unique_id_from_time() unique_dir_name = f"{test_name}-{unique_id}" diff --git a/src/cat_ai/runner.py b/src/cat_ai/runner.py index fdae8a4..cb14817 100644 --- a/src/cat_ai/runner.py +++ b/src/cat_ai/runner.py @@ -1,25 +1,59 @@ import os +from typing import Callable, List, Optional from .reporter import Reporter -from typing import Callable, Any class Runner: - def __init__(self, test_function: Callable[..., Any], reporter: Reporter) -> None: + """Executes test functions and collects results using a reporter.""" + + def __init__(self, test_function: Callable[[Reporter], bool], reporter: Reporter) -> None: + """ + Initialize the Runner with a test function and reporter. + + Args: + test_function: Function to execute during test runs + reporter: Reporter instance to track and report test results + """ self.reporter = reporter self.test_function = test_function @staticmethod - def sample_size(default_size: int = 1) -> int: + def get_sample_size(default_size: int = 1) -> int: + """ + Get sample size from environment variable or use default. + + Args: + default_size: Default sample size if not specified in environment + + Returns: + Number of test runs to perform + """ return int(os.getenv("CAT_AI_SAMPLE_SIZE", str(default_size))) - def run_once(self, run_number: int = 0) -> Any: + def run_once(self, run_number: int = 0) -> bool: + """ + Execute the test function once. + + Args: + run_number: Current run index for reporting + + Returns: + Result from the test function + """ self.reporter.run_number = run_number - result = self.test_function(reporter=self.reporter) - return result - - def run_loop(self, tries: int = sample_size()) -> list[Any]: - results = [] - for x in range(0, tries): - results.append(self.run_once(x)) - return results + return self.test_function(self.reporter) + + def run_multiple(self, sample_size: Optional[int] = None) -> List[bool]: + """ + Execute the test function multiple times based on sample size. + + Args: + sample_size: Number of times to run the test, defaults to + value from get_sample_size() if None + + Returns: + List of results from all test runs + """ + runs = sample_size if sample_size is not None else self.get_sample_size() + return [self.run_once(i) for i in range(runs)] diff --git a/tests/cat_ai/test_reporter.py b/tests/cat_ai/test_reporter.py index 784d328..aac3552 100644 --- a/tests/cat_ai/test_reporter.py +++ b/tests/cat_ai/test_reporter.py @@ -1,8 +1,8 @@ import json import time from unittest.mock import mock_open, patch, MagicMock -from cat_ai.reporter import Reporter -from cat_ai.helpers.helpers import root_dir +from src.cat_ai.reporter import Reporter +from src.cat_ai.helpers.helpers import root_dir def test_reporter_create_a_unique_folder_path() -> None: @@ -18,18 +18,14 @@ def test_reporter_create_a_unique_folder_path() -> None: def test_reporter_can_accept_unique_id_override() -> None: test_name = "id_override" unique_id = "some_string" - reporter1 = Reporter( - test_name=test_name, output_dir=root_dir(), unique_id=unique_id - ) + reporter1 = Reporter(test_name=test_name, output_dir=root_dir(), unique_id=unique_id) expected_dir_path = f"{root_dir()}/test_runs/{test_name}-{unique_id}" assert str(expected_dir_path) == str(reporter1.folder_path) @patch("os.makedirs") @patch("builtins.open", new_callable=mock_open) -def test_report_creates_correct_json( - mock_open: MagicMock, mock_makedirs: MagicMock -) -> None: +def test_report_creates_correct_json(mock_open: MagicMock, mock_makedirs: MagicMock) -> None: test_name = "report_creates_correct_json" unique_id = "20231001_120000" reporter = Reporter(test_name=test_name, output_dir=root_dir(), unique_id=unique_id) diff --git a/tests/cat_ai/test_runner.py b/tests/cat_ai/test_runner.py new file mode 100644 index 0000000..57d7f7b --- /dev/null +++ b/tests/cat_ai/test_runner.py @@ -0,0 +1,64 @@ +from src.cat_ai.reporter import Reporter +from src.cat_ai.runner import Runner + + +# Dummy test function that will be passed to Runner +def dummy_test_function(reporter: Reporter) -> bool: + # Imagine that this function does something meaningful + # Simply returning True instead of trying to log + return True + + +def test_runner_sample_size(monkeypatch): + # Set an environment variable to test + monkeypatch.setenv("CAT_AI_SAMPLE_SIZE", "5") + assert Runner.get_sample_size() == 5 + + # Test default size + monkeypatch.delenv("CAT_AI_SAMPLE_SIZE", raising=False) + assert Runner.get_sample_size(default_size=3) == 3 + + +def test_run_once(): + # Create a Reporter with necessary arguments + reporter = Reporter(test_name="test_run_once", output_dir="/tmp") + + # Initialize Runner with dummy test function and Reporter + runner = Runner(test_function=dummy_test_function, reporter=reporter) + + # Test run_once + result = runner.run_once() + assert result is True + assert reporter.run_number == 0 + + +def test_run_multiple(): + # Create a Reporter with necessary arguments + reporter = Reporter(test_name="test_run", output_dir="/tmp") + + # Initialize Runner with dummy test function and Reporter + runner = Runner(test_function=dummy_test_function, reporter=reporter) + + # Test with explicit sample size parameter + results = runner.run_multiple(sample_size=2) + assert len(results) == 2 + assert all(results) + expected_results = [True, True] + assert results == expected_results + + +def test_run_with_env_variable(monkeypatch): + # Set the environment variable for a controlled test + monkeypatch.setenv("CAT_AI_SAMPLE_SIZE", "3") + + # Create a Reporter with necessary arguments + reporter = Reporter(test_name="test_run_with_env", output_dir="/tmp") + + # Initialize Runner with dummy test function and Reporter + runner = Runner(test_function=dummy_test_function, reporter=reporter) + + # Test without explicit sample size (should use environment variable) + results = runner.run_multiple() + assert len(results) == 3 + expected_results = [True, True, True] + assert results == expected_results