promptdriven
diff --git a/‎.env.example‎
Lines changed: 18 additions & 9 deletions b/‎.env.example‎
Lines changed: 18 additions & 9 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pddrc‎
Lines changed: 2 additions & 2 deletions b/‎.pddrc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 55 additions & 2 deletions b/‎CHANGELOG.md‎
Lines changed: 55 additions & 2 deletions
diff --git a/‎Makefile‎
Lines changed: 10 additions & 2 deletions b/‎Makefile‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 34 additions & 2 deletions b/‎README.md‎
Lines changed: 34 additions & 2 deletions
diff --git a/‎context/agentic_common_example.py‎
Lines changed: 84 additions & 0 deletions b/‎context/agentic_common_example.py‎
Lines changed: 84 additions & 0 deletions
@@ -1,9 +1,18 @@
-OPENAI_API_KEY =your-openai-api-key
-GOOGLE_API_KEY = your-google-api-key
-ANTHROPIC_API_KEY = your-anthropic-api-key
-PDD_PATH = Where-Your-PDD-Root-Is-At
-DEEKSEEK_API_KEY = your-deekseek-api-key
-FIREWORKS_API_KEY = your-fireworks-api-key
-PYTHONPATH = staging/pdd
-GROQ_API_KEY = your-groq-api-key
-TOGETHER_API_KEY = your-together-api-key
+# LLM API Keys (at least one required)
+OPENAI_API_KEY=your-openai-api-key
+GOOGLE_API_KEY=your-google-api-key
+ANTHROPIC_API_KEY=your-anthropic-api-key
+
+# Additional LLM Providers (optional)
+DEEPSEEK_API_KEY=your-deepseek-api-key
+FIREWORKS_API_KEY=your-fireworks-api-key
+GROQ_API_KEY=your-groq-api-key
+TOGETHER_API_KEY=your-together-api-key
+
+# Vertex AI (optional - for Gemini via GCP)
+VERTEX_CREDENTIALS=/path/to/service-account.json
+VERTEX_PROJECT=your-gcp-project-id
+VERTEX_LOCATION=us-central1
+
+# PDD Configuration
+PDD_PATH=/path/to/your/pdd
@@ -3,6 +3,8 @@ __pycache__/
 *.py[cod]
 *$py.class
 *.vsix
+.tmp/
+.agentic_prompt*
 
 .vscode/settings.json
 
 
@@ -108,8 +108,8 @@ contexts:
       test_output_path: "tests/"
       example_output_path: "context/"
       default_language: "python"
-      target_coverage: 10.0
-      strength: 0.8
+      target_coverage: 80.0
+      strength: 1
       temperature: 0.0
       budget: 10.0
       max_attempts: 3
@@ -1,12 +1,65 @@
+## v0.0.89 (2025-12-22)
+
+### Feat
+
+- enhance working directory handling in agentic functions
+- enhance fix_error_loop documentation with control flow diagram
+- add --simple flag for legacy update mode in CLI
+- add simple option for legacy routing in update_main
+- refine fix_error_loop module with enhanced logging and iterative error handling
+- enhance git_update function with agentic and legacy routing logic
+- enhance auto_include and insert_includes to filter self-referential dependencies
+- enhance test coverage for agentic functionality with formal verification and unit tests
+- prioritize prompt changes in sync decision logic
+- enhance testing framework for agentic updates with formal verification and unit tests
+- implement crash_main and fix_code_loop functions for error handling
+- add formal verification and unit tests for agentic verification
+- add agentic verification functionality and example
+- implement agentic crash handling and testing framework
+- introduce agentic update functionality for prompt files
+
+### Fix
+
+- clarify prompt authority in error handling and test expectations
+- add progress callback parameter to test functions
+- ensure test file preservation during sync operations
+
+### Refactor
+
+- enhance agentic fix functionality and improve output reporting
+- enhance agentic functionality and improve CLI integration
+- enhance agentic functionality and prompt management
+- enhance clarity and structure in modify_python prompt
+- improve prompt clarity and structure in git_update and update_main
+- streamline agentic_fix and update prompts
+
 ## v0.0.88 (2025-12-19)
 
 ### Feat
 
-- enhance configuration management and testing
+- **Multi-Test File Support for Fix Command:** The `pdd fix` command now accepts multiple unit test files as arguments. Each test file is processed separately by the LLM, enabling targeted fixes per test file rather than concatenating all tests into a single blob. Results are aggregated with overall success requiring all individual fixes to succeed.
+
+- **Numbered Test Output Files:** The `test` and `bug` commands now automatically create numbered output files (e.g., `test_module_1.py`, `test_module_2.py`) when the output file already exists and `--force` is not specified, preventing accidental overwrites while maintaining workflow continuity.
+
+- **Multi-File Existing Tests for Test Command:** The `--existing-tests` option in `cmd_test_main` now accepts multiple test file paths. All test file contents are concatenated and provided to the LLM for context-aware test generation.
+
+Many thanks to Jiamin Cai for your contributions!
 
 ### Fix
 
-- adjust commit order in Makefile for public repo updates
+- **Setup Tool Shell Escaping:** Fixed a security and correctness issue where API keys containing special shell characters (`$`, `"`, `'`, `\`) would generate malformed shell scripts. Now uses `shlex.quote()` for proper POSIX shell escaping across bash, zsh, fish, and csh variants. Thank you to Dhruv Garg for your contributions!
+
+- **Makefile Commit Order:** Adjusted commit order for public repository updates to ensure proper synchronization.
+
+### Tests
+
+- Added 574 lines of comprehensive tests for `setup_tool.py` in `tests/test_setup_tool.py` covering shell script generation with special characters for bash, zsh, fish, and csh.
+
+- Added regression test #21 for multi-test file fix workflow, verifying end-to-end LLM-based fixes across multiple test files.
+
+- Added tests for numbered test file output in `tests/test_construct_paths.py` verifying automatic file numbering behavior.
+
+- Added tests for multi-file existing tests handling in `tests/test_cmd_test_main.py`.
 
 ## v0.0.87 (2025-12-18)
 
 
@@ -571,6 +571,8 @@ release: check-deps
 	else \
 		echo "Bumping version with commitizen"; \
 		python -m commitizen bump --increment PATCH --yes; \
+		echo "Pushing to origin before publishing"; \
+		git push origin main --tags; \
 		echo "Publishing new version"; \
 		$(MAKE) publish; \
 	fi
@@ -713,7 +715,10 @@ publish-public:
 	fi
 	@echo "Committing and pushing updates in public repo"
 	@if git -C "$(PUBLIC_PDD_REPO_DIR)" rev-parse --is-inside-work-tree >/dev/null 2>&1; then \
-		cd "$(PUBLIC_PDD_REPO_DIR)" && git add . && git commit -m "Bump version" && git fetch origin && git rebase origin/main && git push; \
+		cd "$(PUBLIC_PDD_REPO_DIR)" && git add . && git commit -m "Bump version" && git fetch origin && git rebase origin/main && \
+		CURR_VER=$$(sed -n 's/^version[[:space:]]*=[[:space:]]*"\([0-9.]*\)"/\1/p' pyproject.toml | head -n1) && \
+		(git tag -a "v$$CURR_VER" -m "Release v$$CURR_VER" 2>/dev/null || true) && \
+		git push && git push --tags; \
 		else \
 			echo "Skip commit: $(PUBLIC_PDD_REPO_DIR) is not a Git repo. Set PUBLIC_PDD_REPO_DIR to a clone of $(PUBLIC_PDD_REMOTE)."; \
 		fi
@@ -845,7 +850,10 @@ publish-public-cap:
 	fi
 	@echo "Committing and pushing updates in CAP public repo"
 	@if git -C "$(PUBLIC_PDD_CAP_REPO_DIR)" rev-parse --is-inside-work-tree >/dev/null 2>&1; then \
-		cd "$(PUBLIC_PDD_CAP_REPO_DIR)" && git add . && git commit -m "Bump version" && git fetch origin && git rebase origin/main && git push; \
+		cd "$(PUBLIC_PDD_CAP_REPO_DIR)" && git add . && git commit -m "Bump version" && git fetch origin && git rebase origin/main && \
+		CURR_VER=$$(sed -n 's/^version[[:space:]]*=[[:space:]]*"\([0-9.]*\)"/\1/p' pyproject.toml | head -n1) && \
+		(git tag -a "v$$CURR_VER" -m "Release v$$CURR_VER" 2>/dev/null || true) && \
+		git push && git push --tags; \
 		else \
 			echo "Skip commit: $(PUBLIC_PDD_CAP_REPO_DIR) is not a Git repo. Set PUBLIC_PDD_CAP_REPO_DIR to a clone of $(PUBLIC_PDD_CAP_REMOTE)."; \
 		fi
 
@@ -1,6 +1,6 @@
 # PDD (Prompt-Driven Development) Command Line Interface
 
-![PDD-CLI Version](https://img.shields.io/badge/pdd--cli-v0.0.88-blue) [![Discord](https://img.shields.io/badge/Discord-join%20chat-7289DA.svg?logo=discord&logoColor=white)](https://discord.gg/Yp4RTh8bG7)
+![PDD-CLI Version](https://img.shields.io/badge/pdd--cli-v0.0.89-blue) [![Discord](https://img.shields.io/badge/Discord-join%20chat-7289DA.svg?logo=discord&logoColor=white)](https://discord.gg/Yp4RTh8bG7)
 
 ## Introduction
 
@@ -285,7 +285,7 @@ export PDD_TEST_OUTPUT_PATH=/path/to/tests/
 
 ## Version
 
-Current version: 0.0.88
+Current version: 0.0.89
 
 To check your installed version, run:
 ```
@@ -1606,6 +1606,28 @@ pdd [GLOBAL OPTIONS] change --csv --output modified_prompts/ changes_batch.csv s
 
 Update prompts based on code changes. This command operates in two primary modes:
 
+**Agentic Prompt Optimization (Default)**
+
+The `update` command uses an agentic AI (Claude Code, Gemini, or Codex) by default to produce compact, high-quality prompts. The agent has full file access and performs a 4-step optimization:
+
+1. **Assess Differences**: Reads the prompt (including all `<include>` files) and compares against the modified code
+2. **Filter Using Guide + Tests**: Consults `docs/prompting_guide.md` and existing tests to determine what belongs in the prompt
+3. **Remove Duplication**: Eliminates redundant content that duplicates included files
+4. **Validate**: Ensures the prompt is human-readable and can reliably regenerate the code
+
+This produces prompts that are more concise while remaining clear to developers and reliable for code generation.
+
+**Prerequisites**: Requires one of these CLI tools installed and configured:
+- `claude` (Anthropic Claude Code)
+- `gemini` (Google Gemini CLI)
+- `codex` (OpenAI Codex CLI)
+
+If no agentic CLI is available, the command automatically falls back to the legacy 2-stage LLM update process.
+
+**Test-Aware Updates**: When tests exist for a module (e.g., `test_my_module.py`, `test_my_module_1.py`), the agentic update automatically discovers and considers them. Behaviors verified by tests don't need to be explicitly specified in the prompt, resulting in more compact prompts.
+
+**Modes:**
+
 1.  **Repository-Wide Mode (Default)**: When run with no file arguments, `pdd update` scans the entire repository. It finds all code/prompt pairs, creates any missing prompt files, and updates all of them based on the latest Git changes. This is the easiest way to keep your entire project in sync.
 
 2.  **Single-File Mode**: When you provide file arguments, the command operates on a specific file. There are three distinct use cases for this mode:
@@ -1657,13 +1679,23 @@ Options:
 - `--output LOCATION`: Specify where to save the updated prompt file. **If not specified, the original prompt file is overwritten to maintain it as the authoritative source of truth.** If an environment variable `PDD_UPDATE_OUTPUT_PATH` is set, it will be used only when `--output` is explicitly omitted and you want a different default location.
 - `--git`: Use git history to find the original code file, eliminating the need for the `INPUT_CODE_FILE` argument.
 - `--extensions EXTENSIONS`: In repository-wide mode, filter the update to only include files with the specified comma-separated extensions (e.g., `py,js,ts`).
+- `--simple`: Use the legacy 2-stage LLM update process instead of the default agentic mode. Useful when agentic CLIs are not available or for faster updates.
 
 Example (overwrite original prompt - default behavior):
 ```
 pdd [GLOBAL OPTIONS] update factorial_calculator_python.prompt src/modified_factorial_calculator.py src/original_factorial_calculator.py
 # This overwrites factorial_calculator_python.prompt in place
 ```
 
+Example (agentic vs simple mode):
+```bash
+# Default: Agentic mode (uses claude/gemini/codex for intelligent optimization)
+pdd update --git my_module_python.prompt src/my_module.py
+
+# Legacy: Simple 2-stage LLM update (faster, no agentic CLI required)
+pdd update --simple --git my_module_python.prompt src/my_module.py
+```
+
 
 
 
 
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+# Add the project root to sys.path to allow importing the pdd package
+# This assumes the script is located in pdd/context/ relative to the package root
+project_root = Path(__file__).resolve().parent.parent
+sys.path.append(str(project_root))
+
+from pdd.agentic_common import get_available_agents, run_agentic_task
+
+
+def main() -> None:
+    """
+    Demonstrates how to use the agentic_common module to:
+    1. Discover available agent providers (Claude, Gemini, Codex).
+    2. Run a headless agentic task to create a file.
+    3. Parse the results including cost and provider used.
+    """
+    
+    # 1. Setup the environment
+    # We will use a local './output' directory as the agent's working directory (cwd).
+    # The agent will have permission to read/write files in this directory.
+    output_dir = Path("./output")
+    output_dir.mkdir(exist_ok=True)
+    
+    print(f"Agent working directory: {output_dir.resolve()}")
+
+    # 2. Check availability
+    # get_available_agents() checks if the CLI tool is on PATH and if API keys are set.
+    agents = get_available_agents()
+    print(f"Available agents: {agents}")
+
+    if not agents:
+        print("Warning: No agents detected. Ensure CLI tools (claude, gemini, codex) are installed and API keys are set.")
+        print("The run_agentic_task call below will fail gracefully.")
+
+    # 3. Define the task
+    # The instruction is natural language. We ask the agent to write a specific Python script.
+    instruction = (
+        "Create a file named 'generated_math.py'. "
+        "Inside it, write a Python function 'calculate_factorial(n)' that returns the factorial of n. "
+        "Add a main block that prints the factorial of 5."
+    )
+
+    print(f"\n--- Running Task: {instruction} ---")
+
+    # 4. Run the agentic task
+    # - cwd: The directory where the agent operates.
+    # - verbose: Prints debug logs (useful to see CLI commands).
+    # - label: A prefix for logs to identify this specific task.
+    success, output_message, cost, provider_used = run_agentic_task(
+        instruction=instruction,
+        cwd=output_dir,
+        verbose=True,
+        quiet=False,
+        label="demo"
+    )
+
+    # 5. Inspect Results
+    print("\n--- Execution Results ---")
+    print(f"Success       : {success}")
+    print(f"Provider Used : {provider_used if provider_used else 'None'}")
+    print(f"Estimated Cost: ${cost:.6f}")
+    print(f"Agent Output  : {output_message}")
+
+    # 6. Verify the side effects (File Creation)
+    if success:
+        target_file = output_dir / "generated_math.py"
+        if target_file.exists():
+            print(f"\n[Verification] File '{target_file.name}' was created successfully.")
+            content = target_file.read_text(encoding="utf-8")
+            print("File Content:")
+            print("-" * 40)
+            print(content)
+            print("-" * 40)
+        else:
+            print(f"\n[Verification] Task succeeded, but file '{target_file.name}' was not found.")
+
+
+if __name__ == "__main__":
+    main()