promptdriven
diff --git a/‎README.md‎
Lines changed: 11 additions & 11 deletions b/‎README.md‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎pdd/cmd_test_main.py‎
Lines changed: 26 additions & 15 deletions b/‎pdd/cmd_test_main.py‎
Lines changed: 26 additions & 15 deletions
diff --git a/‎pdd/commands/fix.py‎
Lines changed: 49 additions & 29 deletions b/‎pdd/commands/fix.py‎
Lines changed: 49 additions & 29 deletions
diff --git a/‎pdd/commands/generate.py‎
Lines changed: 6 additions & 4 deletions b/‎pdd/commands/generate.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎pdd/construct_paths.py‎
Lines changed: 36 additions & 5 deletions b/‎pdd/construct_paths.py‎
Lines changed: 36 additions & 5 deletions
diff --git a/‎pdd/fix_errors_from_unit_tests.py‎
Lines changed: 2 additions & 1 deletion b/‎pdd/fix_errors_from_unit_tests.py‎
Lines changed: 2 additions & 1 deletion
@@ -1314,10 +1314,10 @@ Arguments:
 - `CODE_FILE`: The filename of the code file to be tested.
 
 Options:
-- `--output LOCATION`: Specify where to save the generated test file. The default file name is `test_<basename>.<language_file_extension>`.
+- `--output LOCATION`: Specify where to save the generated test file. The default file name is `test_<basename>.<language_file_extension>`. If an output file with the specified name already exists, a new file with a numbered suffix (e.g., `test_calculator_1.py`) will be created instead of overwriting.
 - `--language`: Specify the programming language. Defaults to the language specified by the prompt file name.
 - `--coverage-report PATH`: Path to the coverage report file for existing tests. When provided, generates additional tests to improve coverage.
-- `--existing-tests PATH`: Path to the existing unit test file. Required when using --coverage-report.
+- `--existing-tests PATH [PATH...]`: Path(s) to the existing unit test file(s). Required when using --coverage-report. Multiple paths can be provided.
 - `--target-coverage FLOAT`: Desired code coverage percentage to achieve (default is 90.0).
 - `--merge`: When used with --existing-tests, merges new tests with existing test file instead of creating a separate file.
 
@@ -1346,9 +1346,9 @@ could influence the output of the `pdd test` command when run in the same direct
 pdd [GLOBAL OPTIONS] test --output tests/test_factorial_calculator.py factorial_calculator_python.prompt src/factorial_calculator.py
 ```
 
-2. Generate additional tests to improve coverage:
+2. Generate additional tests to improve coverage (with multiple existing test files):
 ```
-pdd [GLOBAL OPTIONS] test --coverage-report coverage.xml --existing-tests tests/test_calculator.py --output tests/test_calculator_enhanced.py calculator_python.prompt src/calculator.py
+pdd [GLOBAL OPTIONS] test --coverage-report coverage.xml --existing-tests tests/test_calculator.py --existing-tests tests/test_calculator_edge_cases.py --output tests/test_calculator_enhanced.py calculator_python.prompt src/calculator.py
 ```
 
 3. Improve coverage and merge with existing tests:
@@ -1465,11 +1465,11 @@ pdd [GLOBAL OPTIONS] fix [OPTIONS] PROMPT_FILE CODE_FILE UNIT_TEST_FILE ERROR_FI
 Arguments:
 - `PROMPT_FILE`: The filename of the prompt file that generated the code under test.
 - `CODE_FILE`: The filename of the code file to be fixed.
-- `UNIT_TEST_FILE`: The filename of the unit test file.
+- `UNIT_TEST_FILES`: The filename(s) of the unit test file(s). Multiple files can be provided, and each will be processed individually.
 - `ERROR_FILE`: The filename containing the unit test runtime error messages. Optional and does not need to exist when used with the `--loop` command.
 
 Options:
-- `--output-test LOCATION`: Specify where to save the fixed unit test file. The default file name is `test_<basename>_fixed.<language_file_extension>`. If an environment variable `PDD_FIX_TEST_OUTPUT_PATH` is set, the file will be saved in that path unless overridden by this option.
+- `--output-test LOCATION`: Specify where to save the fixed unit test file. The default file name is `test_<basename>_fixed.<language_file_extension>`. **Warning: If multiple `UNIT_TEST_FILES` are provided along with this option, only the fixed content of the last processed test file will be saved to this location, overwriting previous results. For individual fixed files, omit this option.**
 - `--output-code LOCATION`: Specify where to save the fixed code file. The default file name is `<basename>_fixed.<language_file_extension>`. If an environment variable `PDD_FIX_CODE_OUTPUT_PATH` is set, the file will be saved in that path unless overridden by this option.
 - `--output-results LOCATION`: Specify where to save the results of the error fixing process. The default file name is `<basename>_fix_results.log`. If an environment variable `PDD_FIX_RESULTS_OUTPUT_PATH` is set, the file will be saved in that path unless overridden by this option.
 - `--loop`: Enable iterative fixing process.
@@ -1481,8 +1481,8 @@ Options:
 When the `--loop` option is used, the fix command will attempt to fix errors through multiple iterations. It will use the specified verification program to check if the code runs correctly after each fix attempt. The process will continue until either the errors are fixed, the maximum number of attempts is reached, or the budget is exhausted.
 
 Outputs:
-- Fixed unit test file
-- Fixed code file
+- Fixed unit test file(s).
+- Fixed code file.
 - Results file containing the LLM model's output with unit test results.
 - Print out of results when using '--loop' containing:
   - Success status (boolean)
@@ -1492,9 +1492,9 @@ Outputs:
 
 Example:
 ```
-pdd [GLOBAL OPTIONS] fix --output-test tests/test_factorial_calculator_fixed.py --output-code src/factorial_calculator_fixed.py --output-results results/factorial_fix_results.log factorial_calculator_python.prompt src/factorial_calculator.py tests/test_factorial_calculator.py errors.log
+pdd [GLOBAL OPTIONS] fix --output-code src/factorial_calculator_fixed.py --output-results results/factorial_fix_results.log factorial_calculator_python.prompt src/factorial_calculator.py tests/test_factorial_calculator.py tests/test_factorial_calculator_edge_cases.py errors.log
 ```
-In this example, `factorial_calculator_python.prompt` is the prompt file that originally generated the code under test.
+In this example, `pdd fix` will be run for each test file, and the fixed test files will be saved as `tests/test_factorial_calculator_fixed.py` and `tests/test_factorial_calculator_edge_cases_fixed.py`.
 
 
 #### Agentic Fallback Mode
@@ -1784,7 +1784,7 @@ Arguments:
 - `DESIRED_OUTPUT_FILE`: File containing the desired (correct) output of the program.
 
 Options:
-- `--output LOCATION`: Specify where to save the generated unit test. The default file name is `test_<basename>_bug.<language_extension>`.
+- `--output LOCATION`: Specify where to save the generated unit test. The default file name is `test_<basename>_bug.<language_extension>`. If an output file with the specified name already exists, a new file with a numbered suffix (e.g., `test_calculator_bug_1.py`) will be created instead of overwriting.
 - `--language`: Specify the programming language for the unit test (default is "Python").
 
 Example:
 
@@ -21,7 +21,7 @@ def cmd_test_main(
     output: str | None,
     language: str | None,
     coverage_report: str | None,
-    existing_tests: str | None,
+    existing_tests: list[str] | None,
     target_coverage: float | None,
     merge: bool | None,
     strength: float | None = None,
@@ -40,7 +40,7 @@ def cmd_test_main(
         output (str | None): Path to save the generated test file.
         language (str | None): Programming language.
         coverage_report (str | None): Path to the coverage report file.
-        existing_tests (str | None): Path to the existing unit test file.
+        existing_tests (list[str] | None): Paths to the existing unit test files.
         target_coverage (float | None): Desired code coverage percentage.
         merge (bool | None): Whether to merge new tests with existing tests.
 
@@ -76,7 +76,7 @@ def cmd_test_main(
         if coverage_report:
             input_file_paths["coverage_report"] = coverage_report
         if existing_tests:
-            input_file_paths["existing_tests"] = existing_tests
+            input_file_paths["existing_tests"] = existing_tests[0]
 
         command_options = {
             "output": output,
@@ -94,6 +94,15 @@ def cmd_test_main(
             context_override=ctx.obj.get('context'),
             confirm_callback=ctx.obj.get('confirm_callback')
         )
+
+        # Read multiple existing test files and concatenate their content
+        if existing_tests:
+            existing_tests_content = ""
+            for test_file in existing_tests:
+                with open(test_file, 'r') as f:
+                    existing_tests_content += f.read() + "\n"
+            input_strings["existing_tests"] = existing_tests_content
+
         # Use centralized config resolution with proper priority:
         # CLI > pddrc > defaults
         effective_config = resolve_effective_config(
@@ -119,20 +128,11 @@ def cmd_test_main(
         print(f"[bold blue]Language detected:[/bold blue] {language}")
 
     # Determine where the generated tests will be written so we can share it with the LLM
+    # Always use resolved_output since construct_paths handles numbering for test/bug commands
     resolved_output = output_file_paths["output"]
-    if output is None:
-        output_file = resolved_output
-    else:
-        try:
-            is_dir_hint = output.endswith('/')
-        except Exception:
-            is_dir_hint = False
-        if is_dir_hint or (Path(output).exists() and Path(output).is_dir()):
-            output_file = resolved_output
-        else:
-            output_file = output
+    output_file = resolved_output
     if merge and existing_tests:
-        output_file = existing_tests
+        output_file = existing_tests[0]
 
     if not output_file:
         print("[bold red]Error: Output file path could not be determined.[/bold red]")
@@ -193,6 +193,17 @@ def cmd_test_main(
             # Return error result instead of ctx.exit(1) to allow orchestrator to handle gracefully
             return "", 0.0, f"Error: {exception}"
 
+    # Handle output - always use resolved file path since construct_paths handles numbering
+    resolved_output = output_file_paths["output"]
+    output_file = resolved_output
+    if merge and existing_tests:
+        output_file = existing_tests[0] if existing_tests else None
+
+    if not output_file:
+        print("[bold red]Error: Output file path could not be determined.[/bold red]")
+        ctx.exit(1)
+        return "", 0.0, ""
+    
     # Check if unit_test content is empty
     if not unit_test or not unit_test.strip():
         print(f"[bold red]Error: Generated unit test content is empty or whitespace-only.[/bold red]")
 
@@ -2,7 +2,7 @@
 Fix command.
 """
 import click
-from typing import Dict, Optional, Tuple, Any
+from typing import Dict, List, Optional, Tuple, Any
 
 from ..fix_main import fix_main
 from ..track_cost import track_cost
@@ -11,7 +11,7 @@
 @click.command("fix")
 @click.argument("prompt_file", type=click.Path(exists=True, dir_okay=False))
 @click.argument("code_file", type=click.Path(exists=True, dir_okay=False))
-@click.argument("unit_test_file", type=click.Path(exists=True, dir_okay=False))
+@click.argument("unit_test_files", nargs=-1, type=click.Path(exists=True, dir_okay=False))
 @click.argument("error_file", type=click.Path(dir_okay=False))  # Allow non-existent for loop mode
 @click.option(
     "--output-test",
@@ -32,9 +32,9 @@
     help="Specify where to save the results log (file or directory).",
 )
 @click.option(
-    "--loop", 
-    is_flag=True, 
-    default=False, 
+    "--loop",
+    is_flag=True,
+    default=False,
     help="Enable iterative fixing process."
 )
 @click.option(
@@ -63,7 +63,7 @@
     default=False,
     help="Automatically submit the example if all unit tests pass.",
 )
-@click.option( 
+@click.option(
     "--agentic-fallback/--no-agentic-fallback",
     is_flag=True,
     default=True,
@@ -75,7 +75,7 @@ def fix(
     ctx: click.Context,
     prompt_file: str,
     code_file: str,
-    unit_test_file: str,
+    unit_test_files: Tuple[str, ...],
     error_file: str,
     output_test: Optional[str],
     output_code: Optional[str],
@@ -87,30 +87,50 @@ def fix(
     auto_submit: bool,
     agentic_fallback: bool,
 ) -> Optional[Tuple[Dict[str, Any], float, str]]:
-    """Fix code based on a prompt and unit test errors."""
+    """Fix code based on a prompt and unit test errors.
+
+    Accepts one or more UNIT_TEST_FILES. Each test file is processed separately,
+    allowing the AI to run and fix tests individually rather than as a concatenated blob.
+    """
     try:
-        # The actual logic is in fix_main
-        success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_main(
-            ctx=ctx,
-            prompt_file=prompt_file,
-            code_file=code_file,
-            unit_test_file=unit_test_file,
-            error_file=error_file,
-            output_test=output_test,
-            output_code=output_code,
-            output_results=output_results,
-            loop=loop,
-            verification_program=verification_program,
-            max_attempts=max_attempts,
-            budget=budget,
-            auto_submit=auto_submit,
-            agentic_fallback=agentic_fallback,
-        )
+        all_results: List[Dict[str, Any]] = []
+        total_cost = 0.0
+        model_name = ""
+
+        # Process each unit test file separately
+        for unit_test_file in unit_test_files:
+            success, fixed_unit_test, fixed_code, attempts, cost, model = fix_main(
+                ctx=ctx,
+                prompt_file=prompt_file,
+                code_file=code_file,
+                unit_test_file=unit_test_file,
+                error_file=error_file,
+                output_test=output_test,
+                output_code=output_code,
+                output_results=output_results,
+                loop=loop,
+                verification_program=verification_program,
+                max_attempts=max_attempts,
+                budget=budget,
+                auto_submit=auto_submit,
+                agentic_fallback=agentic_fallback,
+            )
+            all_results.append({
+                "success": success,
+                "fixed_unit_test": fixed_unit_test,
+                "fixed_code": fixed_code,
+                "attempts": attempts,
+                "unit_test_file": unit_test_file,
+            })
+            total_cost += cost
+            model_name = model
+
+        # Aggregate results
+        overall_success = all(r["success"] for r in all_results)
         result = {
-            "success": success,
-            "fixed_unit_test": fixed_unit_test,
-            "fixed_code": fixed_code,
-            "attempts": attempts,
+            "success": overall_success,
+            "results": all_results,
+            "total_attempts": sum(r["attempts"] for r in all_results),
         }
         return result, total_cost, model_name
     except click.Abort:
 
@@ -206,8 +206,8 @@ def example(
 @click.option(
     "--existing-tests",
     type=click.Path(exists=True, dir_okay=False),
-    default=None,
-    help="Path to the existing unit test file.",
+    multiple=True,
+    help="Path to existing unit test file(s). Can be specified multiple times.",
 )
 @click.option(
     "--target-coverage",
@@ -230,20 +230,22 @@ def test(
     output: Optional[str],
     language: Optional[str],
     coverage_report: Optional[str],
-    existing_tests: Optional[str],
+    existing_tests: Tuple[str, ...],
     target_coverage: Optional[float],
     merge: bool,
 ) -> Optional[Tuple[str, float, str]]:
     """Generate unit tests for a given prompt and implementation."""
     try:
+        # Convert empty tuple to None for cmd_test_main compatibility
+        existing_tests_list = list(existing_tests) if existing_tests else None
         test_code, total_cost, model_name = cmd_test_main(
             ctx=ctx,
             prompt_file=prompt_file,
             code_file=code_file,
             output=output,
             language=language,
             coverage_report=coverage_report,
-            existing_tests=existing_tests,
+            existing_tests=existing_tests_list,
             target_coverage=target_coverage,
             merge=merge,
         )
 
@@ -271,6 +271,24 @@ def _extract_basename(
     """
     Deduce the project basename according to the rules explained in *Step A*.
     """
+    # Handle 'fix' command specifically to create a unique basename per test file
+    if command == "fix":
+        prompt_path = _candidate_prompt_path(input_file_paths)
+        if not prompt_path:
+            raise ValueError("Could not determine prompt file for 'fix' command.")
+        
+        prompt_basename = _strip_language_suffix(prompt_path)
+        
+        unit_test_path = input_file_paths.get("unit_test_file")
+        if not unit_test_path:
+            # Fallback to just the prompt basename if no unit test file is provided
+            # This might happen in some edge cases, but 'fix' command structure requires it
+            return prompt_basename
+
+        # Use the stem of the unit test file to make the basename unique
+        test_basename = Path(unit_test_path).stem
+        return f"{prompt_basename}_{test_basename}"
+        
     # Handle conflicts first due to its unique structure
     if command == "conflicts":
         key1 = "prompt1"
@@ -778,12 +796,25 @@ def construct_paths(
          raise # Re-raise the ValueError
 
     # ------------- Step 4: overwrite confirmation ------------
-    # Check if any output *file* exists (operate on Path objects)
+    # Initialize existing_files before the conditional to avoid UnboundLocalError
     existing_files: Dict[str, Path] = {}
-    for k, p_obj in output_paths_resolved.items():
-        # p_obj = Path(p_val) # Conversion now happens earlier
-        if p_obj.is_file():
-            existing_files[k] = p_obj # Store the Path object
+
+    if command in ["test", "bug"] and not force:
+        # For test/bug commands without --force, create numbered files instead of overwriting
+        for key, path in output_paths_resolved.items():
+            if path.is_file():
+                base, ext = os.path.splitext(path)
+                i = 1
+                new_path = Path(f"{base}_{i}{ext}")
+                while new_path.exists():
+                    i += 1
+                    new_path = Path(f"{base}_{i}{ext}")
+                output_paths_resolved[key] = new_path
+    else:
+        # Check if any output *file* exists (operate on Path objects)
+        for k, p_obj in output_paths_resolved.items():
+            if p_obj.is_file():
+                existing_files[k] = p_obj # Store the Path object
 
     if existing_files and not force:
         paths_list = "\n".join(f"  • {p.resolve()}" for p in existing_files.values())
 
@@ -114,7 +114,8 @@ def fix_errors_from_unit_tests(
     Fix errors in unit tests using LLM models and log the process.
 
     Args:
-        unit_test (str): The unit test code
+        unit_test (str): The unit test code, potentially multiple files concatenated
+                         with <file name="filename.py">...</file> tags.
         code (str): The code under test
         prompt (str): The prompt that generated the code
         error (str): The error message