thisisartium · paulz · Mar 7, 2025 · Mar 7, 2025 · Copilot · Mar 7, 2025
diff --git a/.github/workflows/cat-test-examples.yml b/.github/workflows/cat-test-examples.yml
@@ -44,7 +44,7 @@ jobs:
           echo "CAT_AI_SAMPLE_SIZE=$ROUNDS" >> $GITHUB_ENV
 
       - name: Run Example tests
-        run: uv run pytest examples/team_recommender/tests/example_6_n_generations/
+        run: uv run pytest examples/team_recommender/tests/example_7_*
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
 

diff --git a/...ples/team_recommender/tests/example_7_schema_validators/test_response_has_valid_schema.py b/...ples/team_recommender/tests/example_7_schema_validators/test_response_has_valid_schema.py
@@ -0,0 +1,168 @@
+import json
+import os
+
+from jsonschema import FormatChecker, validate
+from openai import OpenAI
+from tests.settings import ROOT_DIR
+
+from cat_ai.reporter import Reporter
+from cat_ai.runner import Runner
+
+
+def get_all_developer_names(skills_data) -> set[str]:
+    return {
+        developer["developer"]["name"]
+        for skill in skills_data["skills"]
+        for developer in skill["developerSkills"]
+    }
+
+
+def get_developer_names_from_response(response) -> set[str]:
+    return {developer["name"] for developer in response["developers"]}
+
+
+def response_matches_json_schema(response: str, schema: any) -> bool:
+    """
+    Validates if a given response matches the provided JSON schema.
+
+    :param response: The response JSON data as a string.
+    :param schema: The schema to validate against.
+    :return: True if the response matches the schema, otherwise False.
+    """
+    try:
+        validate(instance=response, schema=schema, format_checker=FormatChecker())
+        return True
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False
+
+
+def load_json_fixture(file_name: str) -> dict:
+    """
+    Utility function to load a JSON fixture file.
+
+    :param file_name: Name of the JSON file to load.
+    :return: Parsed JSON data as a dictionary.
+    """
+    json_path = os.path.join(ROOT_DIR, "fixtures", file_name)
+    with open(json_path, "r") as file:
+        return json.load(file)
+
+
+def test_response_matches_json_schema():
+    # Load example output and schema
+    example_output = load_json_fixture("example_output.json")
+    schema = load_json_fixture("output_schema.json")
+
+    assert response_matches_json_schema(example_output, schema)
+
+
+def has_expected_success_rate(results: list[bool], expected_success_rate: float) -> bool:
+    if not results:
+        return True
+
+    failure_count = sum(not result for result in results)
+    total_count = len(results)
+    failure_rate = float(failure_count) / float(total_count)
+    print(1.0 - failure_rate)
-    print(1.0 - failure_rate)
+    logging.info(1.0 - failure_rate)
-    print(1.0 - failure_rate)
+    logging.info(1.0 - failure_rate)
+    return expected_success_rate <= (1.0 - failure_rate)
+
+
+def test_response_has_valid_schema():
+    generations = Runner.get_sample_size()
+
+    skills_data = load_json_fixture("skills.json")
+    example_output = load_json_fixture("example_output.json")
+
+    system_prompt = f"""
+        You will get a description of a project, and your task is to tell me the best developers from the given list for the project
+         based on their skills.
+        Today's date is April 15th, 2025.
+        Pick only developers who are available after the project start date. Pick people with higher skill levels first.
+        respond in json with this structure:
+            {example_output}
+
+        Here is the skills data:
+        """
+    system_prompt = system_prompt + str(skills_data)
+
+    project_description = """
+    This is a mobile project for telecommunication company. The project starts June 3rd.
+    It will find exciting moments from sports highlights videos.
+    """
+
+    client = OpenAI()
+    assert client is not None
+
+    completion = client.chat.completions.create(
+        model="gpt-4-1106-preview",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": project_description},
+        ],
+        response_format={"type": "json_object"},
+        n=generations,
+    )
+    responses = completion.choices
+
+    results = []
+    for run in range(0, generations):
+        response = responses[run].message.content
+        test_reporter = Reporter(
+            "test_fast_with_n_generations",
+            metadata={
+                "system_prompt": system_prompt,
+                "user_prompt": project_description,
+            },
+            output_dir=ROOT_DIR,
+        )
+        test_runner = Runner(
+            lambda reporter: run_allocation_test(
+                reporter, skills_data=skills_data, response=response
+            ),
+            reporter=test_reporter,
+        )
+        results.append(test_runner.run_once(run))
+
+    failure_threshold = 0.8
+    assert has_expected_success_rate(results, failure_threshold)
+
+
+def run_allocation_test(reporter, skills_data, response) -> bool:
+    acceptable_people = ["Sam Thomas", "Drew Anderson", "Alex Wilson", "Alex Johnson"]
+    all_developers = get_all_developer_names(skills_data)
+
+    schema = load_json_fixture("output_schema.json")
+    has_valid_json_schema = False
+
+    not_empty_response = True
+    no_developer_name_is_hallucinated = True
+    developer_is_appropriate = True
+    try:
+        json_object = json.loads(response)
+        has_valid_json_schema = response_matches_json_schema(json_object, schema)
+        developer_names = get_developer_names_from_response(json_object)
+        not_empty_response = len(developer_names) != 0
+        developer_is_appropriate = any(name in developer_names for name in acceptable_people)
+        if not not_empty_response:
+            no_developer_name_is_hallucinated = False not in [
+                name in all_developers for name in developer_names
+            ]
+    except json.JSONDecodeError as e:
+        print(f"JSON Exception: {e}")
+
+    reporter.report(
+        json_object,
+        {
+            "correct_developer_suggested": developer_is_appropriate,
+            "no_developer_name_is_hallucinated": no_developer_name_is_hallucinated,
+            "not_empty_response": not_empty_response,
+            "valid_json_returned": has_valid_json_schema,
+        },
+    )
+    return (
+        developer_is_appropriate
+        and no_developer_name_is_hallucinated
+        and not_empty_response
+        and has_valid_json_schema
+    )
diff --git a/examples/team_recommender/tests/fixtures/output_schema.json b/examples/team_recommender/tests/fixtures/output_schema.json
@@ -1,32 +1,50 @@
 {
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "object",
-    "properties": {
-        "developers": {
-        "type": "array",
-        "items": {
-            "type": "object",
-            "properties": {
-            "name": { "type": "string" },
-            "availableStartDate": { "type": "string", "format": "date-time" },
-            "relevantSkills": {
-                "type": "array",
-                "items": {
-                "type": "object",
-                "properties": {
-                    "skill": { "type": "string" },
-                    "level": { "type": "string" }
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "properties": {
+    "developers": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          },
+          "availableStartDate": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "relevantSkills": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "skill": {
+                  "type": "string"
                 },
-                "required": ["skill", "level"],
-                "additionalProperties": false
+                "level": {
+                  "type": "string"
                 }
+              },
+              "required": [
+                "skill",
+                "level"
+              ],
+              "additionalProperties": false
             }
-            },
-            "required": ["name", "availableStartDate", "relevantSkills"],
-            "additionalProperties": false
-        }
-        }
-    },
-    "required": ["developers"],
-    "additionalProperties": false
-    }
+          }
+        },
+        "required": [
+          "name",
+          "availableStartDate",
+          "relevantSkills"
+        ],
+        "additionalProperties": false
+      }
+    }
+  },
+  "required": [
+    "developers"
+  ],
+  "additionalProperties": false
+}