-
Notifications
You must be signed in to change notification settings - Fork 12
Add json schema validator example #12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
168 changes: 168 additions & 0 deletions
168
...ples/team_recommender/tests/example_7_schema_validators/test_response_has_valid_schema.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| import json | ||
| import os | ||
|
|
||
| from jsonschema import FormatChecker, validate | ||
| from openai import OpenAI | ||
| from tests.settings import ROOT_DIR | ||
|
|
||
| from cat_ai.reporter import Reporter | ||
| from cat_ai.runner import Runner | ||
|
|
||
|
|
||
| def get_all_developer_names(skills_data) -> set[str]: | ||
| return { | ||
| developer["developer"]["name"] | ||
| for skill in skills_data["skills"] | ||
| for developer in skill["developerSkills"] | ||
| } | ||
|
|
||
|
|
||
| def get_developer_names_from_response(response) -> set[str]: | ||
| return {developer["name"] for developer in response["developers"]} | ||
|
|
||
|
|
||
| def response_matches_json_schema(response: str, schema: any) -> bool: | ||
| """ | ||
| Validates if a given response matches the provided JSON schema. | ||
|
|
||
| :param response: The response JSON data as a string. | ||
| :param schema: The schema to validate against. | ||
| :return: True if the response matches the schema, otherwise False. | ||
| """ | ||
| try: | ||
| validate(instance=response, schema=schema, format_checker=FormatChecker()) | ||
| return True | ||
| except Exception as e: | ||
| print(f"An unexpected error occurred: {e}") | ||
| return False | ||
|
|
||
|
|
||
| def load_json_fixture(file_name: str) -> dict: | ||
| """ | ||
| Utility function to load a JSON fixture file. | ||
|
|
||
| :param file_name: Name of the JSON file to load. | ||
| :return: Parsed JSON data as a dictionary. | ||
| """ | ||
| json_path = os.path.join(ROOT_DIR, "fixtures", file_name) | ||
| with open(json_path, "r") as file: | ||
| return json.load(file) | ||
|
|
||
|
|
||
| def test_response_matches_json_schema(): | ||
| # Load example output and schema | ||
| example_output = load_json_fixture("example_output.json") | ||
| schema = load_json_fixture("output_schema.json") | ||
|
|
||
| assert response_matches_json_schema(example_output, schema) | ||
|
|
||
|
|
||
| def has_expected_success_rate(results: list[bool], expected_success_rate: float) -> bool: | ||
| if not results: | ||
| return True | ||
|
|
||
| failure_count = sum(not result for result in results) | ||
| total_count = len(results) | ||
| failure_rate = float(failure_count) / float(total_count) | ||
| print(1.0 - failure_rate) | ||
| return expected_success_rate <= (1.0 - failure_rate) | ||
|
|
||
|
|
||
| def test_response_has_valid_schema(): | ||
| generations = Runner.get_sample_size() | ||
|
|
||
| skills_data = load_json_fixture("skills.json") | ||
| example_output = load_json_fixture("example_output.json") | ||
|
|
||
| system_prompt = f""" | ||
| You will get a description of a project, and your task is to tell me the best developers from the given list for the project | ||
| based on their skills. | ||
| Today's date is April 15th, 2025. | ||
| Pick only developers who are available after the project start date. Pick people with higher skill levels first. | ||
| respond in json with this structure: | ||
| {example_output} | ||
|
|
||
| Here is the skills data: | ||
| """ | ||
| system_prompt = system_prompt + str(skills_data) | ||
|
|
||
| project_description = """ | ||
| This is a mobile project for telecommunication company. The project starts June 3rd. | ||
| It will find exciting moments from sports highlights videos. | ||
| """ | ||
|
|
||
| client = OpenAI() | ||
| assert client is not None | ||
|
|
||
| completion = client.chat.completions.create( | ||
| model="gpt-4-1106-preview", | ||
| messages=[ | ||
| {"role": "system", "content": system_prompt}, | ||
| {"role": "user", "content": project_description}, | ||
| ], | ||
| response_format={"type": "json_object"}, | ||
| n=generations, | ||
| ) | ||
| responses = completion.choices | ||
|
|
||
| results = [] | ||
| for run in range(0, generations): | ||
| response = responses[run].message.content | ||
| test_reporter = Reporter( | ||
| "test_fast_with_n_generations", | ||
| metadata={ | ||
| "system_prompt": system_prompt, | ||
| "user_prompt": project_description, | ||
| }, | ||
| output_dir=ROOT_DIR, | ||
| ) | ||
| test_runner = Runner( | ||
| lambda reporter: run_allocation_test( | ||
| reporter, skills_data=skills_data, response=response | ||
| ), | ||
| reporter=test_reporter, | ||
| ) | ||
| results.append(test_runner.run_once(run)) | ||
|
|
||
| failure_threshold = 0.8 | ||
| assert has_expected_success_rate(results, failure_threshold) | ||
|
|
||
|
|
||
| def run_allocation_test(reporter, skills_data, response) -> bool: | ||
| acceptable_people = ["Sam Thomas", "Drew Anderson", "Alex Wilson", "Alex Johnson"] | ||
| all_developers = get_all_developer_names(skills_data) | ||
|
|
||
| schema = load_json_fixture("output_schema.json") | ||
| has_valid_json_schema = False | ||
|
|
||
| not_empty_response = True | ||
| no_developer_name_is_hallucinated = True | ||
| developer_is_appropriate = True | ||
| try: | ||
| json_object = json.loads(response) | ||
| has_valid_json_schema = response_matches_json_schema(json_object, schema) | ||
| developer_names = get_developer_names_from_response(json_object) | ||
| not_empty_response = len(developer_names) != 0 | ||
| developer_is_appropriate = any(name in developer_names for name in acceptable_people) | ||
| if not not_empty_response: | ||
| no_developer_name_is_hallucinated = False not in [ | ||
| name in all_developers for name in developer_names | ||
| ] | ||
| except json.JSONDecodeError as e: | ||
| print(f"JSON Exception: {e}") | ||
|
|
||
| reporter.report( | ||
| json_object, | ||
| { | ||
| "correct_developer_suggested": developer_is_appropriate, | ||
| "no_developer_name_is_hallucinated": no_developer_name_is_hallucinated, | ||
| "not_empty_response": not_empty_response, | ||
| "valid_json_returned": has_valid_json_schema, | ||
| }, | ||
| ) | ||
| return ( | ||
| developer_is_appropriate | ||
| and no_developer_name_is_hallucinated | ||
| and not_empty_response | ||
| and has_valid_json_schema | ||
| ) | ||
74 changes: 46 additions & 28 deletions
74
examples/team_recommender/tests/fixtures/output_schema.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,32 +1,50 @@ | ||
| { | ||
| "$schema": "http://json-schema.org/draft-07/schema#", | ||
| "type": "object", | ||
| "properties": { | ||
| "developers": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "object", | ||
| "properties": { | ||
| "name": { "type": "string" }, | ||
| "availableStartDate": { "type": "string", "format": "date-time" }, | ||
| "relevantSkills": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "object", | ||
| "properties": { | ||
| "skill": { "type": "string" }, | ||
| "level": { "type": "string" } | ||
| "$schema": "http://json-schema.org/draft-07/schema#", | ||
| "type": "object", | ||
| "properties": { | ||
| "developers": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "object", | ||
| "properties": { | ||
| "name": { | ||
| "type": "string" | ||
| }, | ||
| "availableStartDate": { | ||
| "type": "string", | ||
| "format": "date-time" | ||
| }, | ||
| "relevantSkills": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "object", | ||
| "properties": { | ||
| "skill": { | ||
| "type": "string" | ||
| }, | ||
| "required": ["skill", "level"], | ||
| "additionalProperties": false | ||
| "level": { | ||
| "type": "string" | ||
| } | ||
| }, | ||
| "required": [ | ||
| "skill", | ||
| "level" | ||
| ], | ||
| "additionalProperties": false | ||
| } | ||
| }, | ||
| "required": ["name", "availableStartDate", "relevantSkills"], | ||
| "additionalProperties": false | ||
| } | ||
| } | ||
| }, | ||
| "required": ["developers"], | ||
| "additionalProperties": false | ||
| } | ||
| } | ||
| }, | ||
| "required": [ | ||
| "name", | ||
| "availableStartDate", | ||
| "relevantSkills" | ||
| ], | ||
| "additionalProperties": false | ||
| } | ||
| } | ||
| }, | ||
| "required": [ | ||
| "developers" | ||
| ], | ||
| "additionalProperties": false | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using print for debugging in production test code may not be ideal; consider using a proper logging framework or removing the debug statement.