Update test generation pipeline for QualityFlow.- Improved configurations and code organization

htahir1 · htahir1 · commit 44c095455f07 · 2025-08-25T11:23:16.000+02:00
diff --git a/qualityflow/README.md b/qualityflow/README.md
@@ -63,37 +63,39 @@ The main pipeline handles the complete test generation workflow:
 └─────────────────────────────────────────────────────────────────┘
 ```
 
-## 📦 Quick Start
+## 🚀 Quick Start
 
-### Prerequisites
-
-- Python 3.9+
-- ZenML installed (`pip install zenml`)
-- Git
-- OpenAI API key (optional, can use fake provider)
-
-### Setup
+Get QualityFlow running in 3 simple steps:
 
+### 1. Install Dependencies
 ```bash
 pip install -r requirements.txt
 ```
 
-2. **Set up OpenAI (optional)**:
+### 2. Optional: Set up OpenAI API Key
 ```bash
 export OPENAI_API_KEY="your-api-key-here"
 ```
+*Skip this step to use the fake provider for testing*
 
-3. **Run the pipeline**:
+### 3. Run the Pipeline
 ```bash
 python run.py
 ```
 
-That's it! The pipeline will:
-- Clone the configured repository (default: requests library)
-- Analyze Python files and select candidates
-- Generate tests using OpenAI (or fake provider if no API key)
+**That's it!** The pipeline will automatically:
+- Clone a sample repository (requests library by default)
+- Analyze Python files and select test candidates
+- Generate tests using LLM or fake provider
 - Run tests and measure coverage
-- Generate a comprehensive report comparing approaches
+- Create a detailed comparison report
+
+### What Happens Next?
+
+- Check the ZenML dashboard to see pipeline results
+- View generated test files and coverage reports
+- Compare LLM vs baseline test approaches
+- Experiment with different configurations
 
 ## ⚙️ Configuration
 
@@ -171,18 +173,17 @@ Requirements:
 
 ### A/B Testing Experiments
 
-Use run templates for systematic comparisons:
+Compare different configurations by running with different config files:
 
 ```bash
 # Compare prompt versions
-python scripts/run_experiment.py --config configs/experiment.default.yaml
-python scripts/run_experiment.py --config configs/experiment.strict.yaml
+python run.py --config configs/experiment.default.yaml
+python run.py --config configs/experiment.strict.yaml
 
-# Compare in ZenML dashboard:
+# Compare results in ZenML dashboard:
 # - Coverage metrics
 # - Test quality scores  
 # - Token usage and cost
-# - Promotion decisions
 ```
 
 ### Production Deployment
@@ -199,36 +200,23 @@ zenml stack register production_stack \
   -a s3_store -c ecr_registry -o k8s_orchestrator --set
 ```
 
-### Scheduled Regression
-
-Register batch regression for daily execution:
+### Scheduled Execution
 
-```bash
-python scripts/run_batch.py --config configs/schedule.batch.yaml --schedule
-```
+For automated runs, set up scheduled execution using your preferred orchestration tool or ZenML's scheduling features.
 
 ## 🏗️ Project Structure
 
 ```
 qualityflow/
 ├── README.md
-├── pyproject.toml
 ├── requirements.txt
-├── .env.example
-├── zenml.yaml
 │
 ├── configs/                          # Pipeline configurations
 │   ├── experiment.default.yaml       # Standard experiment settings
-│   ├── experiment.strict.yaml        # High-quality gates
-│   └── schedule.batch.yaml           # Batch regression schedule
-│
-├── domain/                           # Core data models
-│   ├── schema.py                     # Pydantic models
-│   └── stages.py                     # Deployment stages
+│   └── experiment.strict.yaml        # High-quality gates
 │
 ├── pipelines/                        # Pipeline definitions
-│   ├── generate_and_evaluate.py      # Experiment pipeline
-│   └── batch_regression.py           # Scheduled regression
+│   └── generate_and_evaluate.py      # Main pipeline
 │
 ├── steps/                            # Pipeline steps
 │   ├── select_input.py               # Source specification
@@ -237,43 +225,27 @@ qualityflow/
 │   ├── gen_tests_agent.py            # LLM test generation
 │   ├── gen_tests_baseline.py         # Heuristic test generation
 │   ├── run_tests.py                  # Test execution & coverage
-│   ├── evaluate_coverage.py          # Metrics & gate evaluation
-│   ├── compare_and_promote.py        # Model registry promotion
-│   ├── resolve_test_pack.py          # Test pack resolution
+│   ├── evaluate_coverage.py          # Metrics evaluation
 │   └── report.py                     # Report generation
 │
 ├── prompts/                          # Jinja2 prompt templates
 │   ├── unit_test_v1.jinja           # Standard test generation
 │   └── unit_test_strict_v2.jinja    # Comprehensive test generation
 │
-├── materializers/                    # Custom artifact handling
-├── utils/                           # Utility functions
-│
-├── registry/                        # Test Pack registry docs
-│   └── README.md
-│
-├── run_templates/                   # Experiment templates
-│   ├── ab_agent_vs_strict.json    # A/B testing configuration
-│   └── baseline_only.json         # Baseline establishment
-│
-├── scripts/                        # CLI scripts
-│   ├── run_experiment.py          # Experiment runner
-│   └── run_batch.py              # Batch regression runner
+├── examples/                         # Demo code for testing
+│   └── toy_lib/                     # Sample library
+│       ├── calculator.py
+│       └── string_utils.py
 │
-└── examples/                       # Demo code for testing
-    └── toy_lib/                   # Sample library
-        ├── calculator.py
-        └── string_utils.py
+└── run.py                           # Main entry point
 ```
 
 ### Key Components
 
-- **Domain Models**: Pydantic schemas for type safety and validation
 - **Pipeline Steps**: Modular, reusable components with clear interfaces
 - **Prompt Templates**: Jinja2 templates for LLM test generation  
-- **Configuration**: YAML-driven experiment and deployment settings
-- **Quality Gates**: Configurable thresholds for coverage and promotion
-- **Model Registry**: ZenML Model Registry integration for test pack versioning
+- **Configuration**: YAML-driven experiment settings
+- **Test Generation**: Both LLM-based and heuristic approaches for comparison
 
 ## 🚀 Production Deployment
 
@@ -295,17 +267,7 @@ zenml stack register production \
 
 ### Scheduled Execution
 
-Set up automated regression testing:
-
-```bash
-# Register schedule (example with ZenML Cloud)
-python scripts/run_batch.py --config configs/schedule.batch.yaml --schedule
-
-# Monitor via dashboard:
-# - Daily regression results
-# - Coverage trend analysis  
-# - Test pack performance
-```
+Set up automated regression testing using ZenML's scheduling capabilities or your preferred orchestration platform.
 
 ## 🤝 Contributing
 
@@ -344,7 +306,7 @@ Run with debug logging:
 
 ```bash
 export ZENML_LOGGING_VERBOSITY=DEBUG
-python scripts/run_experiment.py --config configs/experiment.default.yaml
+python run.py --config configs/experiment.default.yaml
 ```
 
 ## 📚 Resources
diff --git a/qualityflow/run.py b/qualityflow/run.py
@@ -3,6 +3,7 @@
 """
 
 from pathlib import Path
+from typing import Union
 
 import click
 from pipelines import generate_and_evaluate
@@ -26,15 +27,20 @@
     default=False,
     help="Disable pipeline caching and force fresh execution",
 )
-def main(config: str | None, no_cache: bool):
+def main(config: Union[str, None], no_cache: bool):
     """Run QualityFlow test generation and coverage analysis pipeline.
 
     Simple pipeline that generates tests using LLM, runs them, measures coverage,
     and compares results against baseline approaches.
     """
 
-    project_root = Path(__file__).parent
-    default_config = project_root / "configs" / "experiment.default.yaml"
+    try:
+        project_root = Path(__file__).resolve().parent
+        default_config = project_root / "configs" / "experiment.default.yaml"
+    except Exception:
+        # Fallback to current working directory
+        default_config = Path.cwd() / "configs" / "experiment.default.yaml"
+    
     chosen_config = config or str(default_config)
 
     try:
diff --git a/qualityflow/steps/fetch_source.py b/qualityflow/steps/fetch_source.py
@@ -1,5 +1,8 @@
 """
 Fetch source code workspace step.
+
+This module provides functionality to clone Git repositories and prepare
+workspaces for code analysis and test generation.
 """
 
 import subprocess
diff --git a/qualityflow/steps/gen_tests_agent.py b/qualityflow/steps/gen_tests_agent.py
@@ -64,8 +64,14 @@ def gen_tests_agent(
 
     # Load prompt template from QualityFlow project directory
     # Note: workspace_dir is the cloned repo, but prompts are in QualityFlow project
-    project_root = Path(__file__).parent.parent  # Go up from steps/ to project root
-    prompt_file = project_root / prompt_path
+    try:
+        # Try to resolve project root more robustly
+        current_file = Path(__file__).resolve()
+        project_root = current_file.parent.parent  # Go up from steps/ to project root
+        prompt_file = project_root / prompt_path
+    except Exception:
+        # Fallback to current working directory if path resolution fails
+        prompt_file = Path.cwd() / prompt_path
 
     if prompt_file.exists():
         with open(prompt_file, "r") as f:
diff --git a/qualityflow/steps/gen_tests_baseline.py b/qualityflow/steps/gen_tests_baseline.py
@@ -1,5 +1,9 @@
 """
 Generate baseline/skeleton tests using heuristics.
+
+This module creates simple test templates by analyzing Python AST to identify
+functions and classes, generating skeleton test code for comparison with
+LLM-generated tests.
 """
 
 import ast
diff --git a/qualityflow/steps/report.py b/qualityflow/steps/report.py
@@ -1,5 +1,9 @@
 """
 Generate comprehensive pipeline report.
+
+This module creates detailed markdown reports comparing LLM-generated tests
+against baseline tests, including coverage metrics, quality assessments,
+and recommendations for improvement.
 """
 
 import tempfile