diff --git a/cover_agent/CoverAgent.py b/cover_agent/CoverAgent.py index 6f3de76fb..8aed51a8d 100644 --- a/cover_agent/CoverAgent.py +++ b/cover_agent/CoverAgent.py @@ -39,6 +39,8 @@ def __init__(self, args): llm_model=args.model, api_base=args.api_base, use_report_coverage_feature_flag=args.use_report_coverage_feature_flag, + mutation_testing=args.mutation_testing, + more_mutation_logging=args.more_mutation_logging, ) def _validate_paths(self): @@ -151,6 +153,9 @@ def run(self): # Run the coverage tool again if the desired coverage hasn't been reached self.test_gen.run_coverage() + if self.args.mutation_testing: + self.test_gen.run_mutations() + # Log the final coverage if self.test_gen.current_coverage >= (self.test_gen.desired_coverage / 100): self.logger.info( diff --git a/cover_agent/PromptBuilder.py b/cover_agent/PromptBuilder.py index 6b81bf58d..b7205fd66 100644 --- a/cover_agent/PromptBuilder.py +++ b/cover_agent/PromptBuilder.py @@ -42,6 +42,7 @@ def __init__( additional_instructions: str = "", failed_test_runs: str = "", language: str = "python", + mutation_testing: bool = False, ): """ The `PromptBuilder` class is responsible for building a formatted prompt string by replacing placeholders with the actual content of files read during initialization. It takes in various paths and settings as parameters and provides a method to generate the prompt. @@ -72,6 +73,7 @@ def __init__( self.test_file = self._read_file(test_file_path) self.code_coverage_report = code_coverage_report self.language = language + self.mutation_testing = mutation_testing # add line numbers to each line in 'source_file'. start from 1 self.source_file_numbered = "\n".join( [f"{i + 1} {line}" for i, line in enumerate(self.source_file.split("\n"))] @@ -141,12 +143,20 @@ def build_prompt(self) -> dict: } environment = Environment(undefined=StrictUndefined) try: - system_prompt = environment.from_string( - get_settings().test_generation_prompt.system - ).render(variables) - user_prompt = environment.from_string( - get_settings().test_generation_prompt.user - ).render(variables) + if self.mutation_testing: + system_prompt = environment.from_string( + get_settings().mutation_test_prompt.system + ).render(variables) + user_prompt = environment.from_string( + get_settings().mutation_test_prompt.user + ).render(variables) + else: + system_prompt = environment.from_string( + get_settings().test_generation_prompt.system + ).render(variables) + user_prompt = environment.from_string( + get_settings().test_generation_prompt.user + ).render(variables) except Exception as e: logging.error(f"Error rendering prompt: {e}") return {"system": "", "user": ""} diff --git a/cover_agent/UnitTestGenerator.py b/cover_agent/UnitTestGenerator.py index 0c73ecfc4..7d7750dfb 100644 --- a/cover_agent/UnitTestGenerator.py +++ b/cover_agent/UnitTestGenerator.py @@ -4,6 +4,9 @@ import logging import os import re +import json + +from wandb.sdk.data_types.trace_tree import Trace from cover_agent.AICaller import AICaller from cover_agent.CoverageProcessor import CoverageProcessor @@ -14,6 +17,10 @@ from cover_agent.settings.config_loader import get_settings from cover_agent.utils import load_yaml +import subprocess + +from shlex import split + class UnitTestGenerator: def __init__( @@ -30,6 +37,8 @@ def __init__( desired_coverage: int = 90, # Default to 90% coverage if not specified additional_instructions: str = "", use_report_coverage_feature_flag: bool = False, + mutation_testing: bool = False, + more_mutation_logging: bool = False, ): """ Initialize the UnitTestGenerator class with the provided parameters. @@ -65,6 +74,8 @@ def __init__( self.additional_instructions = additional_instructions self.language = self.get_code_language(source_file_path) self.use_report_coverage_feature_flag = use_report_coverage_feature_flag + self.mutation_testing = mutation_testing + self.more_mutation_logging = more_mutation_logging self.last_coverage_percentages = {} self.llm_model = llm_model @@ -213,7 +224,7 @@ def run_coverage(self): "Will default to using the full coverage report. You will need to check coverage manually for each passing test." ) with open(self.code_coverage_report_path, "r") as f: - self.code_coverage_report = f.read() + self.code_coverage_report = f.read() @staticmethod def get_included_files(included_files): @@ -761,6 +772,113 @@ def to_dict(self): def to_json(self): return json.dumps(self.to_dict()) + def run_mutations(self): + self.logger.info("Running mutation tests") + + # Run mutation tests + + mutation_prompt_builder = PromptBuilder( + source_file_path=self.source_file_path, + test_file_path=self.test_file_path, + code_coverage_report=self.code_coverage_report, + included_files=self.included_files, + additional_instructions=self.additional_instructions, + failed_test_runs=self.failed_test_runs, + language=self.language, + mutation_testing=True + ) + + mutation_prompt = mutation_prompt_builder.build_prompt() + + response, prompt_token_count, response_token_count = ( + self.ai_caller.call_model(prompt=mutation_prompt) + ) + + mutation_dict = load_yaml(response) + + for mutation in mutation_dict["mutations"]: + result = self.run_mutation(mutation) + + # Prepare the log message with banners + log_message = f"Mutation result (return code: {result.returncode}): " + if result.returncode == 0: + log_message += "Mutation survived. We changed the source file but the test still passed. We should revert the generated test or fix it.\n" + elif result.returncode == 1: + log_message += "Mutation caught. This means the test was written correctly because changing the source failed the directed test.\n" + else: + self.logger.error(f"Mutation test failed with return code {result.returncode}") + + # Add STDOUT to the log message if it's not empty + if result.stdout.strip() and self.more_mutation_logging: + log_message += "\n" + "="*10 + " STDOUT " + "="*10 + "\n" + log_message += result.stdout + + # Add STDERR to the log message if it's not empty + if result.stderr.strip() and self.more_mutation_logging: + log_message += "\n" + "="*10 + " STDERR " + "="*10 + "\n" + log_message += result.stderr + + + self.logger.info(log_message) + + + def run_mutation(self, mutation): + mutated_code = mutation.get("mutated_version", None) + line_number = mutation.get("location", None) + + if not mutated_code or not line_number: + self.logger.error("Mutation does not contain mutated code or line number") + self.logger.error(f"Mutation: {mutation}") + return None + + + # Read the original content + with open(self.source_file_path, "r") as source_file: + original_content = source_file.readlines() + + # Determine the indentation level of the line at line_number + indentation = len(original_content[line_number - 1]) - len(original_content[line_number - 1].lstrip()) + + # Adjust the indentation of the mutated code + adjusted_mutated_code = [ + ' ' * indentation + line if line.strip() else line + for line in mutated_code.split("\n") + ] + + # Insert the mutated code at the specified spot + modified_content = ( + original_content[:line_number - 1] + + adjusted_mutated_code + ["\n"] + + original_content[line_number:] + ) + + # Write the modified content back to the file + with open(self.source_file_path, "w") as source_file: + source_file.writelines(modified_content) + source_file.flush() + + # Step 2: Run the test using the Runner class + self.logger.info( + f'Running test with the following command: "{self.test_command}"' + ) + + try: + result = subprocess.run( + split(self.test_command), + text=True, + capture_output=True, + cwd=self.test_command_dir, + timeout=30, + ) + except Exception as e: + logging.error(f"Error running test command: {e}") + result = None + finally: + # Write the modified content back to the file + with open(self.source_file_path, "w") as source_file: + source_file.writelines(original_content) + source_file.flush() + return result def extract_error_message_python(fail_message): """ diff --git a/cover_agent/main.py b/cover_agent/main.py index 1a9a9f10c..750df9e07 100644 --- a/cover_agent/main.py +++ b/cover_agent/main.py @@ -101,6 +101,16 @@ def parse_args(): default="", help="Path to optional log database. Default: %(default)s.", ) + parser.add_argument( + "--mutation-testing", + action="store_true", + help="Setting this to True enables mutation testing. Default: False.", + ) + parser.add_argument( + "--more-mutation-logging", + action="store_true", + help="Setting this to True enables more logging. Default: False.", + ) return parser.parse_args() diff --git a/cover_agent/settings/config_loader.py b/cover_agent/settings/config_loader.py index 1d2622d55..138aef751 100644 --- a/cover_agent/settings/config_loader.py +++ b/cover_agent/settings/config_loader.py @@ -7,6 +7,7 @@ "language_extensions.toml", "analyze_suite_test_headers_indentation.toml", "analyze_suite_test_insert_line.toml", + "mutation_test_prompt.toml", ] diff --git a/cover_agent/settings/mutation_test_prompt.toml b/cover_agent/settings/mutation_test_prompt.toml new file mode 100644 index 000000000..04b41e885 --- /dev/null +++ b/cover_agent/settings/mutation_test_prompt.toml @@ -0,0 +1,85 @@ +[mutation_test_prompt] +system="""\ +""" + +user="""\ + +You are an AI mutation testing agent tasked with mutating {{ language }} code to evaluate its robustness. + +Mutation Strategy: + +1. Logic Tweaks: + Modify conditions (e.g., 'if (a < b)' to 'if (a <= b)') + Adjust loop boundaries + Introduce minor calculation errors + Avoid drastic changes or infinite loops. + +2. Output Modifications: + Change return types or formats + Alter response structures + Return corrupted or incorrect data + +3. Method Interference: + Alter function parameters + Replace or omit key method calls + +4. Failure Injection: + Introduce exceptions or error states + Simulate system or resource failures + +5.Data Handling Faults: + Inject parsing errors + Bypass data validation + Corrupt object states + +6. Boundary Condition Testing: + Use out-of-bounds indices + Test extreme or edge-case parameters + +7. Concurrency Issues: + Simulate race conditions or deadlocks + Introduce timeouts or delays + +8. Security Vulnerabilities: + Replicate common vulnerabilities (e.g., buffer overflow, SQL injection, XSS) + Introduce authentication or authorization bypasses + + +Focus on subtle, realistic mutations that challenge the code's resilience while keeping core functionality intact. Prioritize scenarios likely to arise from programming errors or edge cases. + + +## Source Code to add Mutations to: {{ source_file_name }} +```{{language}} +{{ source_file_numbered }} +``` + +## Task +1. Conduct a line-by-line analysis of the source code. +2. Generate mutations for each test case. +3. Prioritize mutating function blocks and critical code sections. +4. Ensure the mutations offer meaningful insights into code quality and test coverage. +5. Present the output in order of ascending line numbers. +6. Avoid including manually inserted line numbers in the response. +7. Limit mutations to single-line changes only. + +Example output: +```yaml +file: {{source_file}} +mutations: + - method: + category: + summary: + location: + original: | + + mutated_version: | + +``` + +Use block scalar('|') to format each YAML output. + +Response (should be a valid YAML, and nothing else, and do not return the source code): +```yaml + +Generate mutants that test the code’s resilience while preserving core functionality. Output only in YAML format, with no additional explanations or comments, and use only the variables scoped in the original code. Do not make any new variables. +""" \ No newline at end of file diff --git a/docs/mutation_testing.md b/docs/mutation_testing.md new file mode 100644 index 000000000..e84f6ddbe --- /dev/null +++ b/docs/mutation_testing.md @@ -0,0 +1,44 @@ +# Mutation Testing in Cover Agent + +## Overview of Mutation Testing + +Mutation testing is a technique used to evaluate the quality and effectiveness of test suites. It involves making small changes, or "mutations," to the source code and then running the test suite to see if the tests can detect the changes. The goal is to ensure that the tests are robust enough to catch potential bugs introduced by these mutations. If a test fails due to a mutation, it indicates that the test suite is effective in catching errors. Conversely, if a mutation does not cause any test to fail, it suggests that the test suite may need improvement. + +## How Mutation Testing Works in Cover Agent + +In the Cover Agent, mutation testing is integrated into the `UnitTestGenerator` class. After generating and validating the tests, the mutation testing process is initiated if enabled. Here's a brief overview of how it works: + +1. **Mutation Prompt Building**: The `PromptBuilder` class constructs a prompt specifically for mutation testing. This prompt guides the AI to generate potential mutations for the source code. + +2. **Running Mutations**: The `run_mutations` method in the `UnitTestGenerator` class executes the mutation tests. It uses the AI to generate a list of mutations, applies each mutation to the source code, and runs the test suite to check if the mutation is detected. + +3. **Logging Results**: The results of each mutation test are logged, indicating whether the mutation was caught (i.e., caused a test to fail) or survived (i.e., did not cause any test to fail). + +## How to Run Mutation Testing in Cover Agent + +To run mutation testing in Cover Agent, you need to use the command-line arguments added in `main.py`. Here’s how you can enable and execute mutation testing: + +1. **Enable Mutation Testing**: Use the `--mutation-testing` flag when running the Cover Agent. This flag activates the mutation testing feature. + +2. **Enable Detailed Logging**: If you want more detailed logging of the mutation testing process, use the `--more-mutation-logging` flag. This will provide additional information about the mutations and their effects. + +### Example Command + +```bash +python cover_agent/main.py --mutation-testing --more-mutation-logging +``` + +This command will run the Cover Agent with mutation testing enabled and provide detailed logs of the mutation process. + +Note: `` denotes the regular arguments that are supplied when running Cover Agent (e.g. `--source-file-path`, `--test-file-path`, etc.). For more details see the top level `README.md` file. + +## Additional Information + +### Configuration + +Mutation testing prompts are configured using a TOML file named `mutation_test_prompt.toml`. This file defines the strategies and templates used for generating mutations. You can customize this file to adjust the mutation strategies according to your needs. + +### Limitations and Considerations + +- **Performance**: Mutation testing can be resource-intensive as it involves running the test suite multiple times with different mutations. +- **Mutation Quality**: The effectiveness of mutation testing depends on the quality of the mutations generated. Ensure that the mutation strategies are well-defined to produce meaningful insights. diff --git a/templated_tests/python_fastapi/test_app.py b/templated_tests/python_fastapi/test_app.py index 8c391f3e9..284357bc5 100644 --- a/templated_tests/python_fastapi/test_app.py +++ b/templated_tests/python_fastapi/test_app.py @@ -3,6 +3,7 @@ from app import app from datetime import date +import math client = TestClient(app) def test_root(): diff --git a/tests/test_CoverAgent.py b/tests/test_CoverAgent.py index ebc084e0f..245228df0 100644 --- a/tests/test_CoverAgent.py +++ b/tests/test_CoverAgent.py @@ -119,7 +119,9 @@ def test_duplicate_test_file_with_output_path(self, mock_isfile, mock_copy): model="openai/test-model", api_base="openai/test-api", use_report_coverage_feature_flag=False, - log_db_path="" + log_db_path="", + mutation_testing=False, + more_mutation_logging=False, ) with pytest.raises(AssertionError) as exc_info: @@ -154,7 +156,9 @@ def test_duplicate_test_file_without_output_path(self, mock_isfile): model="openai/test-model", api_base="openai/test-api", use_report_coverage_feature_flag=False, - log_db_path="" + log_db_path="", + mutation_testing=False, + more_mutation_logging=False, ) with pytest.raises(AssertionError) as exc_info: