From c73872d5989b9de64ae88b50ccf54f52f5fd9ea2 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Fri, 21 Nov 2025 17:12:31 -0500 Subject: [PATCH 1/7] Added Pipeline Schemas Doc. --- src/schemas/PIPELINE_SCHEMAS.md | 722 ++++++++++++++++++++++++++++++++ 1 file changed, 722 insertions(+) create mode 100644 src/schemas/PIPELINE_SCHEMAS.md diff --git a/src/schemas/PIPELINE_SCHEMAS.md b/src/schemas/PIPELINE_SCHEMAS.md new file mode 100644 index 0000000..fefeb09 --- /dev/null +++ b/src/schemas/PIPELINE_SCHEMAS.md @@ -0,0 +1,722 @@ +# ACE Pipeline Standardized Schemas + +This document defines the standardized input and output formats for each stage of the ACE pipeline. These schemas ensure consistency across different implementations and enable interoperability between pipeline stages. + +## Implementation Approach + +**Pipeline Pattern:** +Each stage follows a consistent pattern: +1. **Stage Implementation**: Produces dataclass objects (or lists of dataclasses) + metadata +2. **Save Function**: Takes dataclass objects + metadata → saves to JSON file + +**Functions (to be provided):** +- **Save functions**: `save__output(data, metadata, output_path)` - Handle JSON serialization, file writing, directory creation +- **Load functions**: `load__output(file_path) -> ` - Load dataclass objects from JSON files + +Dataclasses provide type safety, validation, and clear structure. JSON is the serialization format. + +## Pipeline Stages + +0. **Experiment Setup** - Initialize experiment and create domain metadata +1. **Area Generation** - Generate domain areas +2. **Capability Generation** - Generate capabilities for each area +3. **Task Generation** - Generate tasks for each capability +4. **Solution Generation** - Generate solutions for each task +5. **Validation** - Validate solutions against tasks + +**Note:** Experiment configuration must remain consistent throughout the pipeline. Once set during experiment setup, it should not be changed to avoid inconsistencies. + +--- + +## Directory Structure + +All outputs are stored in the following directory structure, organized hierarchically by area and capability for easy resumability, with versioning support: + +``` +/ + / + experiment.json # Experiment metadata (all configuration) + domain.json # Domain metadata (contains domain_id) + areas/ + / + areas.json # All areas for this experiment run + / + capabilities/ + / + capabilities.json # All capabilities for this area run + / + tasks/ + / + tasks.json # All tasks for this capability run + solutions/ + / + _solution.json # Individual solution files (e.g., task_000_solution.json) + validation/ + / + _validation.json # Validation results per task (e.g., task_000_validation.json) +``` + +**Example:** +``` +agentic_outputs/ + r0_10x10/ + experiment.json # Experiment configuration + domain.json + areas/ + _20251009_122040/ + areas.json + _20251010_143022/ + areas.json + area_000/ # area_id = "area_000" (first area) + capabilities/ + _20251009_131252/ + capabilities.json + cap_000/ # capability_id = "cap_000" (first capability in area_000) + tasks/ + _20251014_114358/ + tasks.json + solutions/ + _20251016_182128/ + task_000_solution.json + task_001_solution.json + task_002_solution.json + validation/ + _20251017_091500/ + task_000_validation.json + task_001_validation.json + task_002_validation.json + cap_001/ # capability_id = "cap_001" (second capability in area_000) + tasks/ + _20251014_114358/ + tasks.json + area_001/ # area_id = "area_001" (second area) + capabilities/ + _20251009_131252/ + capabilities.json + cap_000/ # capability_id = "cap_000" (first capability in area_001) + tasks/ + _20251014_114358/ + tasks.json +``` + +**Directory Naming Rules:** +- ``: Base output directory (e.g., `agentic_outputs`) +- ``: Experiment identifier (e.g., `r0_10x10`) +- ``: Timestamp tag in format `_YYYYMMDD_HHMMSS` (e.g., `_20251009_122040`) + - Generated automatically when a stage is run + - Allows multiple versions/runs of the same stage + - Each stage has its own tag (independent versioning) +- ``: String identifier in format `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) + - Format: `area_` prefix + zero-padded 3-digit number (000, 001, 002, ...) + - Assigned sequentially starting from 000 when areas are generated + - Example: First area → `area_000`, Second area → `area_001`, etc. + - Unique within an experiment + - Used in directory paths for clean, explicit paths + - Human-readable name stored in `areas.json` +- ``: String identifier in format `cap_` + zero-padded 3-digit number (e.g., `cap_000`, `cap_001`) + - Format: `cap_` prefix + zero-padded 3-digit number (000, 001, 002, ...) + - Assigned sequentially starting from 000 within each area when capabilities are generated + - Example: First capability in area_000 → `cap_000`, Second capability → `cap_001`, etc. + - Unique within an area (but can repeat across areas, e.g., `area_000/cap_000/` and `area_001/cap_000/`) + - Used in directory paths for clean, explicit paths + - Human-readable name stored in `capabilities.json` +- ``: String identifier in format `task_` + zero-padded 3-digit number (e.g., `task_000`, `task_001`) + - Format: `task_` prefix + zero-padded 3-digit number (000, 001, 002, ...) + - Assigned sequentially starting from 000 within each capability when tasks are generated + - Example: First task in cap_000 → `task_000`, Second task → `task_001`, etc. + - Unique within a capability + + +**File Naming:** +- Experiment: `experiment.json` (no versioning, one file per experiment, contains all configuration) +- Domain: `domain.json` (no versioning, one file per experiment) +- Areas: `areas.json` (versioned by tag: `areas//areas.json`) +- Capabilities: `capabilities.json` (versioned by tag: `/capabilities//capabilities.json`) +- Tasks: `tasks.json` (versioned by tag: `//tasks//tasks.json`) +- Solutions: `_solution.json` (versioned by tag: `//solutions//_solution.json`, e.g., `task_000_solution.json`) +- Validation: `_validation.json` (versioned by tag: `//validation//_validation.json`, e.g., `task_000_validation.json`) + +**Resumability Benefits:** +- Each area has its own directory - easy to see which areas are processed +- Each capability has its own directory - easy to see which capabilities are complete +- Missing files are immediately visible in the directory structure +- Can resume from any area or capability by checking if directory/files exist +- Tags allow multiple runs/versions to coexist +- Can check latest tag to determine most recent run + +**Versioning Strategy:** +- Each stage generates a new tag when run (format: `_YYYYMMDD_HHMMSS`) +- Tags are independent per stage (areas can have different tag than capabilities) +- **Input tags**: Each stage requires tag(s) from previous stage(s) to load input data + - Stage 1 (Areas): No input tag (uses domain.json) + - Stage 2 (Capabilities): Requires `areas_tag` from Stage 1 + - Stage 3 (Tasks): Requires `capabilities_tag` from Stage 2 + - Stage 4 (Solutions): Requires `tasks_tag` from Stage 3 + - Stage 5 (Validation): Requires both `tasks_tag` (Stage 3) and `solutions_tag` (Stage 4) +- **Resume tags**: Optional - If provided, stage loads existing output and continues incomplete generation + - Checks for existing files with resume tag + - Identifies which items are incomplete (e.g., missing capabilities, tasks, solutions) + - Continues generation only for incomplete items + - Preserves existing completed items +- **New tags**: If no resume tag provided, generates new tag and creates fresh output + +--- + +## Stage 0: Experiment Setup + +### Input +All inputs come from the configuration file. Important fields: +- **Experiment ID**: String - The experiment identifier (e.g., "r0_10x10") +- **Domain Name**: String - The domain name (e.g., "personal finance", "mathematics") +- **Description**: String (optional) - Domain description +- **Configuration**: Dict - Complete experiment configuration (all config sections: `global_cfg`, `debate_cfg`, `agents`, `area_generation`, `capability_generation`, `task_generation`, `task_solver`, `exp_cfg`, etc.) + +### Tag Handling +- **No input tag required** (first stage) +- **No resume tag** - Always creates new files (overwrites if exists) + +### Outputs + +This stage creates two files: +1. `experiment.json` - Experiment metadata and complete configuration +2. `domain.json` - Domain metadata + +#### Output 1: `experiment.json` + +**Stage Output:** Experiment dataclass + PipelineMetadata +**Save Function:** `save_experiment_output(experiment: Experiment, metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage creates `Experiment` dataclass object with experiment information and configuration +- Stage creates `PipelineMetadata` dataclass object with metadata +- Pass both to `save_experiment_output(experiment, metadata, output_path)` which creates `ExperimentMetadata` dataclass, serializes to JSON, and writes to file + +**File Path:** `//experiment.json` + +```json +{ + "metadata": { + "experiment_id": "r0_10x10", + "stage": "experiment_setup", + "timestamp": "2025-11-06T12:00:00Z" + }, + "experiment": { + "experiment_id": "r0_10x10", + "domain": "personal finance", + "domain_id": "personal_finance", + "configuration": { + "global_cfg": { + "domain": "personal finance", + "output_dir": "agentic_outputs" + }, + "debate_cfg": { + "max_round": 5 + }, + "agents": { + "scientist_a": { + "model_name": "gpt-5", + "seed": 8 + }, + "scientist_b": { + "model_name": "gemini-2.5-pro", + "seed": 88 + }, + "moderator": { + "model_name": "claude-opus-4-1-20250805", + "seed": 888 + } + }, + "area_generation": { + "num_areas": 10 + }, + "capability_generation": { + "num_capabilities_per_area": 5 + }, + "task_generation": { + "num_final_problems_per_capability": 3, + "buffer_param": 2, + "max_rounds": 3 + }, + "task_solver": { + "max_tasks": 0, + "max_rounds": 1 + }, + "exp_cfg": { + "exp_id": "r0_10x10" + } + } + } +} +``` + +**Schema (JSON representation of ExperimentMetadata dataclass):** +- `metadata`: Object containing pipeline metadata + - `experiment_id`: String (required, experiment identifier) + - `stage`: String (required, value: "experiment_setup") + - `timestamp`: String (required, ISO 8601 format) +- `experiment`: Object containing experiment information + - `experiment_id`: String (required, experiment identifier) + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `configuration`: Object (required, all configuration used for this experiment) + - Contains all config sections: `global_cfg`, `debate_cfg`, `agents`, `area_generation`, `capability_generation`, `task_generation`, `task_solver`, `exp_cfg`, etc. + - Structure matches the input configuration format exactly + +#### Output 2: `domain.json` + +**Stage Output:** Domain dataclass object + PipelineMetadata +**Save Function:** `save_domain_output(domain: Domain, metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage creates `Domain` dataclass object with domain information +- Stage creates `PipelineMetadata` dataclass object with metadata +- Pass both to `save_domain_output(domain, metadata, output_path)` which serializes to JSON and writes to file + +**File Path:** `//domain.json` + +```json +{ + "metadata": { + "domain": "personal finance", + "domain_id": "personal_finance", + "stage": "experiment_setup", + "timestamp": "2025-11-06T12:00:00Z" + }, + "domain": { + "name": "personal finance", + "domain_id": "personal_finance", + "description": "Personal finance domain covering budgeting, investing, retirement planning, etc." + } +} +``` + +**Schema (JSON representation of Domain dataclass):** +- `metadata`: Object containing pipeline metadata + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `stage`: String (required, value: "experiment_setup") + - `timestamp`: String (required, ISO 8601 format) +- `domain`: Object containing domain information + - `name`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified identifier, filesystem-safe) + - `description`: String (optional, domain description) + +--- + +## Stage 1: Area Generation + +### Input +- **Domain**: Domain object (from Stage 0) - Loaded from `domain.json` +- **Configuration**: Dict - Stage-specific configuration (e.g., `num_areas`) + +### Tag Handling +- **Input tag**: Not applicable (uses domain.json which has no tag) +- **Resume tag**: Optional - If provided, loads from `areas//areas.json` and continues incomplete area generation +- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `areas//areas.json` + +### Output: `areas.json` + +**Stage Output:** List[Area] dataclasses + PipelineMetadata +**Save Function:** `save_areas_output(areas: List[Area], metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage generates list of `Area` dataclass objects +- Stage creates `PipelineMetadata` dataclass object with metadata +- Pass both to `save_areas_output(areas, metadata, output_path)` which creates `AreaGenerationOutput` dataclass, serializes to JSON, and writes to file + +**File Path:** `//areas//areas.json` +```json +{ + "metadata": { + "domain": "personal finance", + "domain_id": "personal_finance", + "stage": "area_generation", + "tag": "_20251009_122040", + "timestamp": "2025-11-06T12:00:00Z" + }, + "areas": [ + { + "name": "Cash Flow & Budget Management", + "area_id": "area_000", + "description": "Design and monitor budgets using various methodologies..." + } + ] +} +``` + +**Schema (JSON representation of AreaGenerationOutput dataclass):** +- `metadata`: Object containing pipeline metadata + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `stage`: String (required, value: "area_generation") + - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) + - `timestamp`: String (required, ISO 8601 format) + - Note: No `input_tag` field (Stage 1 uses `domain.json` which has no tag) +- `areas`: Array of Area objects + - `name`: String (required, human-readable name, unique within domain) + - `area_id`: String (required, format `area_` + zero-padded 3-digit number, unique within experiment) + - `description`: String (required, detailed description) + +--- + +## Stage 2: Capability Generation + +### Input +- **Areas**: Array of Area objects (from Stage 1) - Loaded from `areas//areas.json` +- **Areas tag**: String - Tag from Stage 1 output (e.g., `_20251009_122040`) +- **Configuration**: Dict - Stage-specific configuration (e.g., `num_capabilities_per_area`) + +### Tag Handling +- **Input tag**: Required - `areas_tag` from Stage 1 output (e.g., `_20251009_122040`) + - Loads areas from `areas//areas.json` +- **Resume tag**: Optional - If provided, loads from `/capabilities//capabilities.json` for each area and continues incomplete capability generation +- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `/capabilities//capabilities.json` for each area + +### Output: `capabilities.json` (one per area) + +**Stage Output:** List[Capability] dataclasses + PipelineMetadata +**Save Function:** `save_capabilities_output(capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage generates list of `Capability` dataclass objects for an area +- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id) +- Pass both to `save_capabilities_output(capabilities, metadata, output_path)` which creates `CapabilityGenerationOutput` dataclass, serializes to JSON, and writes to file + +**File Path:** `///capabilities//capabilities.json` +Where `` is a string in format `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) +```json +{ + "metadata": { + "domain": "personal finance", + "domain_id": "personal_finance", + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "stage": "capability_generation", + "input_tag": "_20251009_122040", + "tag": "_20251009_131252", + "timestamp": "2025-11-06T12:30:00Z" + }, + "capabilities": [ + { + "name": "budget_policy_and_structure", + "capability_id": "cap_000", + "description": "Define the strategic framework and methodology for budgeting...", + "area": "Cash Flow & Budget Management", + "area_id": "area_000" + } + ] +} +``` + +**Schema (JSON representation of CapabilityGenerationOutput dataclass):** +- `metadata`: Object containing pipeline metadata + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `area`: String (required, human-readable area name, must match an area name from Stage 1) + - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) + - `stage`: String (required, value: "capability_generation") + - `input_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the areas tag from Stage 1 used as input) + - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) + - `timestamp`: String (required, ISO 8601 format) +- `capabilities`: Array of Capability objects + - `name`: String (required, human-readable name, unique within area) + - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number starting from 000, unique within area) + - `description`: String (required, detailed description) + - `area`: String (required, human-readable area name, must match parent area name) + - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match parent area_id) + +--- + +## Stage 3: Task Generation + +### Input +- **Capabilities**: Array of Capability objects (from Stage 2) - Loaded from `/capabilities//capabilities.json` +- **Capabilities tag**: String - Tag from Stage 2 output (e.g., `_20251009_131252`) +- **Configuration**: Dict - Stage-specific configuration (e.g., `num_final_problems_per_capability`) + +### Tag Handling +- **Input tag**: Required - `capabilities_tag` from Stage 2 output (e.g., `_20251009_131252`) + - Loads capabilities from `/capabilities//capabilities.json` for each area +- **Resume tag**: Optional - If provided, loads from `//tasks//tasks.json` for each capability and continues incomplete task generation +- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `//tasks//tasks.json` for each capability + +### Output: `tasks.json` (one per capability) + +**Stage Output:** Dict[str, Task] (mapping task_id to Task dataclass) + PipelineMetadata +**Save Function:** `save_tasks_output(tasks: Dict[str, Task], metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage generates dictionary mapping `task_id` strings to `Task` dataclass objects +- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id, capability_id) +- Pass both to `save_tasks_output(tasks, metadata, output_path)` which creates `TaskGenerationOutput` dataclass, serializes to JSON, and writes to file + +**File Path:** `////tasks//tasks.json` +Where `` is format `area_` + zero-padded 3-digit number, `` is format `cap_` + zero-padded 3-digit number (e.g., `area_000/cap_000/`) +```json +{ + "metadata": { + "domain": "personal finance", + "domain_id": "personal_finance", + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "capability": "budget_policy_and_structure", + "capability_id": "cap_000", + "stage": "task_generation", + "input_tag": "_20251009_131252", + "tag": "_20251014_114358", + "timestamp": "2025-11-06T13:00:00Z" + }, + "tasks": { + "task_000": { + "task": "You are advising a client who wants to set up a zero-based budget...", + "capability_id": "cap_000" + }, + "task_001": { + "task": "A family of four needs to restructure their budget...", + "capability_id": "cap_000" + } + } +} +``` + +**Schema (JSON representation of TaskGenerationOutput dataclass):** +- `metadata`: Object containing pipeline metadata + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `area`: String (required, human-readable area name, must match an area name from Stage 1) + - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) + - `capability`: String (required, human-readable capability name, must match a capability name from Stage 2) + - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match a capability_id from Stage 2) + - `stage`: String (required, value: "task_generation") + - `input_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the capabilities tag from Stage 2 used as input) + - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) + - `timestamp`: String (required, ISO 8601 format) +- `tasks`: Object mapping task_id to Task object + - `task_id`: String (required, format: `task_` + zero-padded 3-digit number, unique within capability) + - `task`: String (required, the task/problem text) + - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match parent capability_id) + +--- + +## Stage 4: Solution Generation + +### Input +- **Tasks**: Object mapping task_id to Task objects (from Stage 3) - Loaded from `//tasks//tasks.json` +- **Tasks tag**: String - Tag from Stage 3 output (e.g., `_20251014_114358`) +- **Configuration**: Dict - Stage-specific configuration (e.g., `max_rounds`) + +### Tag Handling +- **Input tag**: Required - `tasks_tag` from Stage 3 output (e.g., `_20251014_114358`) + - Loads tasks from `//tasks//tasks.json` for each capability +- **Resume tag**: Optional - If provided, checks for existing solutions in `//solutions//_solution.json` and continues incomplete solution generation +- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `//solutions//_solution.json` for each task + +### Output: `_solution.json` (one per task) + +**Stage Output:** TaskSolution dataclass + List[AgentSolution] dataclasses + PipelineMetadata +**Save Function:** `save_solution_output(task_solution: TaskSolution, all_solutions: List[AgentSolution], metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage generates `TaskSolution` dataclass object with solution information +- Stage generates list of `AgentSolution` dataclass objects +- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id, capability_id, task_id) +- Pass all to `save_solution_output(task_solution, all_solutions, metadata, output_path)` which creates `SolutionGenerationOutput` dataclass, serializes to JSON, and writes to file + +**File Path:** `////solutions//_solution.json` +Where `` is format `area_` + zero-padded 3-digit number, `` is format `cap_` + zero-padded 3-digit number, `` is format `task_` + zero-padded 3-digit number (e.g., `task_000_solution.json`) +```json +{ + "metadata": { + "domain": "personal finance", + "domain_id": "personal_finance", + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "capability_name": "budget_policy_and_structure", + "capability_id": "cap_000", + "task_id": "task_000", + "stage": "solution_generation", + "input_tag": "_20251014_114358", + "tag": "_20251016_182128", + "timestamp": "2025-11-06T13:30:00Z" + }, + "task_id": "task_000", + "capability_name": "budget_policy_and_structure", + "capability_id": "cap_000", + "area_name": "Cash Flow & Budget Management", + "area_id": "area_000", + "problem": "You are advising a client who wants to set up a zero-based budget...", + "solution": "The optimal approach is to use a zero-based budgeting methodology...", + "numerical_answer": "{\"budget_allocation\": {...}}", + "reasoning": "Both agents agreed on the zero-based approach because...", + "consensus_reached": true, + "total_rounds": 2, + "all_solutions": [ + { + "agent_id": "A", + "task_id": "task_000", + "thought": "I need to analyze the client's financial situation...", + "final_answer": "{\"recommendation\": {...}}", + "numerical_answer": "null", + "round_number": "0" + }, + { + "agent_id": "B", + "task_id": "task_000", + "thought": "The client's income and expenses suggest...", + "final_answer": "{\"recommendation\": {...}}", + "numerical_answer": "null", + "round_number": "0" + } + ] +} +``` + +**Schema (JSON representation of SolutionGenerationOutput dataclass):** +- `metadata`: Object containing pipeline metadata + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `area`: String (required, human-readable area name, must match an area name from Stage 1) + - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) + - `capability_name`: String (required, human-readable capability name, must match a capability name from Stage 2) + - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match a capability_id from Stage 2) + - `task_id`: String (required, must match a task_id from Stage 3) + - `stage`: String (required, value: "solution_generation") + - `input_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tasks tag from Stage 3 used as input) + - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) + - `timestamp`: String (required, ISO 8601 format) +- `task_id`: String (required, must match metadata.task_id) +- `capability_name`: String (required, human-readable capability name, must match metadata.capability_name) +- `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match metadata.capability_id) +- `area_name`: String (required, human-readable area name, must match metadata.area) +- `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match metadata.area_id) +- `problem`: String (required, the task text from Stage 3) +- `solution`: String (required, the final consensus solution) +- `numerical_answer`: String (optional, JSON string with numerical results) +- `reasoning`: String (required, explanation of consensus or disagreement) +- `consensus_reached`: Boolean (required, whether agents reached consensus) +- `total_rounds`: Integer (required, number of debate rounds) +- `all_solutions`: Array of AgentSolution objects + - `agent_id`: String (required, "A" or "B") + - `task_id`: String (required, must match parent task_id) + - `thought`: String (required, agent's reasoning) + - `final_answer`: String (required, JSON string with agent's solution) + - `numerical_answer`: String (optional, JSON string or "null") + - `round_number`: String (required, round number as string) + +--- + +## Stage 5: Validation + +### Input +- **Tasks**: Object mapping task_id to Task objects (from Stage 3) - Loaded from `//tasks//tasks.json` +- **Tasks tag**: String - Tag from Stage 3 output (e.g., `_20251014_114358`) +- **Solutions**: Object mapping task_id to TaskSolution objects (from Stage 4) - Loaded from `//solutions//_solution.json` +- **Solutions tag**: String - Tag from Stage 4 output (e.g., `_20251016_182128`) +- **Configuration**: Dict - Validation criteria + +### Tag Handling +- **Input tags**: Required - Both `tasks_tag` (from Stage 3) and `solutions_tag` (from Stage 4) + - Loads tasks from `//tasks//tasks.json` + - Loads solutions from `//solutions//_solution.json` +- **Resume tag**: Optional - If provided, checks for existing validations in `//validation//_validation.json` and continues incomplete validation +- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `//validation//_validation.json` for each task + +### Output: `_validation.json` (one per task) + +**Stage Output:** ValidationResult dataclass + ValidationCriteria dataclass + PipelineMetadata +**Save Function:** `save_validation_output(validation_result: ValidationResult, criteria: ValidationCriteria, metadata: PipelineMetadata, output_path: Path)` + +**Implementation:** +- Stage generates `ValidationResult` dataclass object with validation information +- Stage generates `ValidationCriteria` dataclass object with criteria results +- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id, capability_id, task_id) +- Pass all to `save_validation_output(validation_result, criteria, metadata, output_path)` which creates `ValidationOutput` dataclass, serializes to JSON, and writes to file + +**File Path:** `////validation//_validation.json` +Where `` is format `area_` + zero-padded 3-digit number, `` is format `cap_` + zero-padded 3-digit number, `` is format `task_` + zero-padded 3-digit number (e.g., `task_000_validation.json`) +```json +{ + "metadata": { + "domain": "personal finance", + "domain_id": "personal_finance", + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "capability": "budget_policy_and_structure", + "capability_id": "cap_000", + "task_id": "task_000", + "stage": "validation", + "input_tags": { + "tasks_tag": "_20251014_114358", + "solutions_tag": "_20251016_182128" + }, + "tag": "_20251017_091500", + "timestamp": "2025-11-06T14:00:00Z" + }, + "task_id": "task_000", + "capability_name": "budget_policy_and_structure", + "capability_id": "cap_000", + "is_valid": true, + "validation_score": 0.95, + "criteria": { + "solution_completeness": true, + "solution_accuracy": true, + "reasoning_quality": true, + "consensus_quality": true + }, + "feedback": "Solution addresses all aspects of the task...", + "errors": [] +} +``` + +**Schema (JSON representation of ValidationOutput dataclass):** +- `metadata`: Object containing pipeline metadata + - `domain`: String (required, human-readable domain name) + - `domain_id`: String (required, slugified domain identifier) + - `area`: String (required, human-readable area name, must match an area name from Stage 1) + - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) + - `capability`: String (required, human-readable capability name, must match a capability name from Stage 2) + - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match a capability_id from Stage 2) + - `task_id`: String (required, format `task_` + zero-padded 3-digit number, must match a task_id from Stage 3) + - `stage`: String (required, value: "validation") + - `input_tags`: Object (required, contains the input tags used) + - `tasks_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tasks tag from Stage 3 used as input) + - `solutions_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the solutions tag from Stage 4 used as input) + - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) + - `timestamp`: String (required, ISO 8601 format) +- `task_id`: String (required, must match metadata.task_id) +- `capability_name`: String (required, human-readable capability name, must match metadata.capability) +- `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match metadata.capability_id) +- `is_valid`: Boolean (required, overall validation status) +- `validation_score`: Float (required, 0.0 to 1.0) +- `criteria`: Object with boolean criteria (ValidationCriteria dataclass) + - `solution_completeness`: Boolean (required) + - `solution_accuracy`: Boolean (required) + - `reasoning_quality`: Boolean (required) + - `consensus_quality`: Boolean (required) +- `feedback`: String (required, detailed feedback) +- `errors`: Array of strings (required, list of errors if any) + +--- + +## ID Assignment Rules + +All IDs are string identifiers with explicit prefixes and sequential numbering: + +- **Area IDs**: Format `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) + - Assigned sequentially starting from `area_000` when areas are generated + - Unique within an experiment + +- **Capability IDs**: Format `cap_` + zero-padded 3-digit number (e.g., `cap_000`, `cap_001`) + - Assigned sequentially starting from `cap_000` within each area when capabilities are generated + - Unique within an area (but can repeat across areas, e.g., `area_000/cap_000/` and `area_001/cap_000/`) + +- **Task IDs**: Format `task_` + zero-padded 3-digit number (e.g., `task_000`, `task_001`) + - Assigned sequentially starting from `task_000` within each capability when tasks are generated + - Unique within a capability + +**ID Properties:** +- String type with explicit prefixes (`area_`, `cap_`, `task_`) +- Sequential assignment (000, 001, 002, ...) +- Zero-padded 3-digit numbers ensure proper sorting +- Stable once assigned (don't change if items are reordered) +- Human-readable names are stored alongside IDs in JSON files From 185e216472710c72d1035be34d96b0308729e397 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Tue, 25 Nov 2025 03:13:26 -0500 Subject: [PATCH 2/7] updated Pipeline Schemas Doc. --- src/schemas/PIPELINE_SCHEMAS.md | 904 +++++++++++++++----------------- 1 file changed, 428 insertions(+), 476 deletions(-) diff --git a/src/schemas/PIPELINE_SCHEMAS.md b/src/schemas/PIPELINE_SCHEMAS.md index fefeb09..7bef4f7 100644 --- a/src/schemas/PIPELINE_SCHEMAS.md +++ b/src/schemas/PIPELINE_SCHEMAS.md @@ -2,21 +2,10 @@ This document defines the standardized input and output formats for each stage of the ACE pipeline. These schemas ensure consistency across different implementations and enable interoperability between pipeline stages. -## Implementation Approach - -**Pipeline Pattern:** -Each stage follows a consistent pattern: -1. **Stage Implementation**: Produces dataclass objects (or lists of dataclasses) + metadata -2. **Save Function**: Takes dataclass objects + metadata → saves to JSON file - -**Functions (to be provided):** -- **Save functions**: `save__output(data, metadata, output_path)` - Handle JSON serialization, file writing, directory creation -- **Load functions**: `load__output(file_path) -> ` - Load dataclass objects from JSON files - -Dataclasses provide type safety, validation, and clear structure. JSON is the serialization format. - ## Pipeline Stages +The ACE pipeline consists of multiple stages, where each stage consumes the output from the previous stage: + 0. **Experiment Setup** - Initialize experiment and create domain metadata 1. **Area Generation** - Generate domain areas 2. **Capability Generation** - Generate capabilities for each area @@ -26,34 +15,94 @@ Dataclasses provide type safety, validation, and clear structure. JSON is the se **Note:** Experiment configuration must remain consistent throughout the pipeline. Once set during experiment setup, it should not be changed to avoid inconsistencies. +## Implementation Approach + +**Pipeline Pattern:** +Each stage follows a consistent pattern: +1. **Consumes Previous Stage Output**: Each stage (except Stage 0) loads data from the previous stage's output files using provided load functions +2. **Stage Implementation**: Produces dataclass objects (or lists of dataclasses) + metadata +3. **Save Function**: Takes dataclass objects + metadata → saves to JSON file using provided save functions + +**Important:** All stage implementations must follow this pattern to ensure the pipeline is clean, consistent, and maintainable. This enables interoperability between different implementations, resumability of failed runs, and clear traceability through the pipeline. + +**Note:** The dataclasses, save functions (`save__output(data, metadata, output_path)`), and load functions (`load__output(file_path) -> `) for each stage will be provided and must be used. Do not implement custom serialization or data structures - use the standardized schemas to ensure consistency across the pipeline. Dataclasses provide type safety, validation, and clear structure. JSON is the serialization format. + +**Iteration Note:** Some stages operate on subsets (one area, capability, or task at a time) and require an outer orchestrator/loop script to iterate over all items: +- **Stage 2 (Capability Generation)**: Operates on one area at a time - orchestrator loops over all areas from Stage 1 +- **Stage 3 (Task Generation)**: Operates on one capability at a time - orchestrator loops over all capabilities from Stage 2 +- **Stage 4 (Solution Generation)**: Operates on one task at a time - orchestrator loops over all tasks from Stage 3 +- **Stage 5 (Validation)**: Operates on one task at a time - orchestrator loops over all solutions from Stage 4 + +The stage implementation itself handles a single item, and the orchestrator manages the iteration across all items. + +--- + +## Naming Conventions + +All identifiers and tags in the pipeline follow standardized formats: + +### Tags +- **Format**: `_YYYYMMDD_HHMMSS` (e.g., `_20251009_122040`) +- **Usage**: Used for versioning outputs in Stages 1-5 +- **Generation**: Automatically generated when a new run is created (timestamp-based) + +### Domain IDs +- **Format**: `domain_` + zero-padded 3-digit number (e.g., `domain_000`) +- **Assignment**: Sequential starting from `domain_000` +- **Scope**: Unique within an experiment (typically only one domain per experiment) + +### Area IDs +- **Format**: `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) +- **Assignment**: Sequential starting from `area_000` when areas are generated +- **Scope**: Unique within an experiment + +### Capability IDs +- **Format**: `cap_` + zero-padded 3-digit number (e.g., `cap_000`, `cap_001`) +- **Assignment**: Sequential starting from `cap_000` within each area when capabilities are generated +- **Scope**: Unique within an area (but can repeat across areas, e.g., `area_000/cap_000/` and `area_001/cap_000/`) + +### Task IDs +- **Format**: `task_` + zero-padded 3-digit number (e.g., `task_000`, `task_001`) +- **Assignment**: Sequential starting from `task_000` within each capability when tasks are generated +- **Scope**: Unique within a capability + + --- ## Directory Structure -All outputs are stored in the following directory structure, organized hierarchically by area and capability for easy resumability, with versioning support: +All outputs are stored in the following flat directory structure, with each stage having its own top-level directory and tags for different generation runs: ``` / / experiment.json # Experiment metadata (all configuration) - domain.json # Domain metadata (contains domain_id) + domain/ + domain.json # Domain metadata (contains domain_id) areas/ - / - areas.json # All areas for this experiment run - / - capabilities/ - / - capabilities.json # All capabilities for this area run - / - tasks/ - / - tasks.json # All tasks for this capability run - solutions/ - / - _solution.json # Individual solution files (e.g., task_000_solution.json) - validation/ - / - _validation.json # Validation results per task (e.g., task_000_validation.json) + / # Tag from area generation (e.g., _20251009_122040) + areas.json # All areas for this area generation run (output from Stage 1) + capabilities/ + / # Tag from capability generation (e.g., _20251009_131252) + / # One directory per area (e.g., area_000, area_001) + capabilities.json # All capabilities for this area in this generation run + tasks/ + / # Tag from task generation (e.g., _20251014_114358) + / # One directory per area (e.g., area_000, area_001) + / # One directory per capability (e.g., cap_000, cap_001) + tasks.json # All tasks for this capability in this generation run + solutions/ + / # Tag from solution generation (e.g., _20251016_182128) + / # One directory per area (e.g., area_000, area_001) + / # One directory per capability (e.g., cap_000, cap_001) + / # One directory per task (e.g., task_000, task_001) + solution.json # Solution for this task + validation/ + / # Tag from validation run (e.g., _20251017_091500) + / # One directory per area (e.g., area_000, area_001) + / # One directory per capability (e.g., cap_000, cap_001) + / # One directory per task (e.g., task_000, task_001) + validation.json # Validation result for this task ``` **Example:** @@ -61,80 +110,70 @@ All outputs are stored in the following directory structure, organized hierarchi agentic_outputs/ r0_10x10/ experiment.json # Experiment configuration - domain.json + domain/ + domain.json # Domain metadata areas/ - _20251009_122040/ - areas.json - _20251010_143022/ - areas.json - area_000/ # area_id = "area_000" (first area) - capabilities/ - _20251009_131252/ - capabilities.json - cap_000/ # capability_id = "cap_000" (first capability in area_000) - tasks/ - _20251014_114358/ - tasks.json - solutions/ - _20251016_182128/ - task_000_solution.json - task_001_solution.json - task_002_solution.json - validation/ - _20251017_091500/ - task_000_validation.json - task_001_validation.json - task_002_validation.json - cap_001/ # capability_id = "cap_001" (second capability in area_000) - tasks/ - _20251014_114358/ - tasks.json - area_001/ # area_id = "area_001" (second area) - capabilities/ - _20251009_131252/ - capabilities.json - cap_000/ # capability_id = "cap_000" (first capability in area_001) - tasks/ - _20251014_114358/ - tasks.json + _20251009_122040/ # Tag from first area generation + areas.json # All areas from this generation + _20251010_143022/ # Tag from second area generation (different set of areas) + areas.json # All areas from this generation + capabilities/ + _20251009_131252/ # Tag from first capability generation + area_000/ + capabilities.json # Capabilities for area_000 + area_001/ + capabilities.json # Capabilities for area_001 + _20251011_091500/ # Tag from second capability generation + area_000/ + capabilities.json # Capabilities for area_000 + tasks/ + _20251014_114358/ # Tag from first task generation + area_000/ + cap_000/ + tasks.json # Tasks for area_000/cap_000 + cap_001/ + tasks.json # Tasks for area_000/cap_001 + area_001/ + cap_000/ + tasks.json # Tasks for area_001/cap_000 + _20251015_120000/ # Tag from second task generation + area_000/ + cap_000/ + tasks.json # Tasks for area_000/cap_000 + solutions/ + _20251016_182128/ # Tag from solution generation + area_000/ + cap_000/ + task_000/ + solution.json + task_001/ + solution.json + area_001/ + cap_000/ + task_000/ + solution.json + validation/ + _20251017_091500/ # Tag from validation run + area_000/ + cap_000/ + task_000/ + validation.json + task_001/ + validation.json + area_001/ + cap_000/ + task_000/ + validation.json ``` -**Directory Naming Rules:** -- ``: Base output directory (e.g., `agentic_outputs`) -- ``: Experiment identifier (e.g., `r0_10x10`) -- ``: Timestamp tag in format `_YYYYMMDD_HHMMSS` (e.g., `_20251009_122040`) - - Generated automatically when a stage is run - - Allows multiple versions/runs of the same stage - - Each stage has its own tag (independent versioning) -- ``: String identifier in format `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) - - Format: `area_` prefix + zero-padded 3-digit number (000, 001, 002, ...) - - Assigned sequentially starting from 000 when areas are generated - - Example: First area → `area_000`, Second area → `area_001`, etc. - - Unique within an experiment - - Used in directory paths for clean, explicit paths - - Human-readable name stored in `areas.json` -- ``: String identifier in format `cap_` + zero-padded 3-digit number (e.g., `cap_000`, `cap_001`) - - Format: `cap_` prefix + zero-padded 3-digit number (000, 001, 002, ...) - - Assigned sequentially starting from 000 within each area when capabilities are generated - - Example: First capability in area_000 → `cap_000`, Second capability → `cap_001`, etc. - - Unique within an area (but can repeat across areas, e.g., `area_000/cap_000/` and `area_001/cap_000/`) - - Used in directory paths for clean, explicit paths - - Human-readable name stored in `capabilities.json` -- ``: String identifier in format `task_` + zero-padded 3-digit number (e.g., `task_000`, `task_001`) - - Format: `task_` prefix + zero-padded 3-digit number (000, 001, 002, ...) - - Assigned sequentially starting from 000 within each capability when tasks are generated - - Example: First task in cap_000 → `task_000`, Second task → `task_001`, etc. - - Unique within a capability - - **File Naming:** - Experiment: `experiment.json` (no versioning, one file per experiment, contains all configuration) - Domain: `domain.json` (no versioning, one file per experiment) -- Areas: `areas.json` (versioned by tag: `areas//areas.json`) -- Capabilities: `capabilities.json` (versioned by tag: `/capabilities//capabilities.json`) -- Tasks: `tasks.json` (versioned by tag: `//tasks//tasks.json`) -- Solutions: `_solution.json` (versioned by tag: `//solutions//_solution.json`, e.g., `task_000_solution.json`) -- Validation: `_validation.json` (versioned by tag: `//validation//_validation.json`, e.g., `task_000_validation.json`) +- Areas: `areas.json` (versioned by tag: `areas//areas.json`) +- Capabilities: `capabilities.json` (versioned by tag: `capabilities///capabilities.json`) +- Tasks: `tasks.json` (versioned by tag: `tasks////tasks.json`) +- Solutions: `solution.json` (versioned by tag: `solutions/////solution.json`) +- Validation: `validation.json` (versioned by tag: `validation/////validation.json`) **Resumability Benefits:** - Each area has its own directory - easy to see which areas are processed @@ -145,14 +184,14 @@ agentic_outputs/ - Can check latest tag to determine most recent run **Versioning Strategy:** -- Each stage generates a new tag when run (format: `_YYYYMMDD_HHMMSS`) +- Each stage generates a new tag when run (see Tags in Naming Conventions section) - Tags are independent per stage (areas can have different tag than capabilities) - **Input tags**: Each stage requires tag(s) from previous stage(s) to load input data - Stage 1 (Areas): No input tag (uses domain.json) - Stage 2 (Capabilities): Requires `areas_tag` from Stage 1 - Stage 3 (Tasks): Requires `capabilities_tag` from Stage 2 - Stage 4 (Solutions): Requires `tasks_tag` from Stage 3 - - Stage 5 (Validation): Requires both `tasks_tag` (Stage 3) and `solutions_tag` (Stage 4) + - Stage 5 (Validation): Requires `solutions_tag` from Stage 4 (task information is included in solution files) - **Resume tags**: Optional - If provided, stage loads existing output and continues incomplete generation - Checks for existing files with resume tag - Identifies which items are incomplete (e.g., missing capabilities, tasks, solutions) @@ -162,23 +201,142 @@ agentic_outputs/ --- +## Dataclasses + +All dataclasses used across pipeline stages are defined below. Stage implementations must use these standardized dataclasses. + +**Note:** All ID and tag formats (domain_id, area_id, capability_id, task_id, tags) are defined in the [Naming Conventions](#naming-conventions) section. Individual field descriptions below do not repeat these format definitions. + +### PipelineMetadata + +All pipeline outputs include a `metadata` object (represented by the `PipelineMetadata` dataclass) that provides pipeline execution context and traceability. + +**Required Fields:** +- `experiment_id`: String (required, experiment identifier) +- `output_base_dir`: String (required, base output directory for all pipeline outputs) +- `timestamp`: String (required, ISO 8601 format, e.g., "2025-11-06T12:00:00Z") +- `input_stage_tag`: String (optional, tag of the input data used from previous stage) - Present when stage uses input from previous stage, null for Stage 0 +- `output_stage_tag`: String (optional, tag for this output) - Present for versioned stages (Stages 1-5), null for Stage 0 (not versioned) +- `resume`: Boolean (required, indicates if this run was resumed from a previous checkpoint) + +**Optional Fields:** +- Additional optional fields may be added as needed for pipeline-specific metadata + +**Note:** +- Stage-specific identifiers (domain_id, area_id, capability_id, task_id) are stored in the actual data objects (Domain, Area, Capability, Task), NOT in PipelineMetadata +- PipelineMetadata focuses on pipeline execution context, not the content being processed + +### Experiment + +**Fields:** +- `experiment_id`: String (required, experiment identifier) +- `domain`: String (required, human-readable domain name) +- `domain_id`: String (required) +- `pipeline_type`: String (optional, e.g., "agentic", "diverse_task") - identifies the pipeline variant +- `configuration`: Dict[str, Any] (required, complete configuration used for this experiment - structure varies by pipeline type) + +### Domain + +**Fields:** +- `name`: String (required, human-readable domain name) +- `domain_id`: String (required) +- `description`: String (optional, domain description) + +### Area + +**Fields:** +- `name`: String (required, human-readable area name) +- `area_id`: String (required) +- `description`: String (optional, area description) +- `domain`: String (required, domain name) +- `domain_id`: String (required) +- `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) + - This field can contain any generation-specific data (e.g., generation method, parameters, intermediate steps) + - Structure is flexible and depends on the generation method + +### Capability + +**Fields:** +- `name`: String (required, capability name) +- `capability_id`: String (required) +- `description`: String (optional, capability description) +- `area`: String (required, area name) +- `area_id`: String (required) +- `domain`: String (required, domain name) +- `domain_id`: String (required) +- `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) + - This field can contain any generation-specific data (e.g., generation method, parameters, intermediate steps) + - Structure is flexible and depends on the generation method + +### Task + +**Fields:** +- `task_id`: String (required, unique within capability) +- `task`: String (required, the task/problem text) +- `capability_id`: String (required) +- `capability`: String (required, capability name) +- `area`: String (required, area name) +- `area_id`: String (required) +- `domain`: String (required, domain name) +- `domain_id`: String (required) + +### TaskSolution + +**Fields:** +- `task_id`: String (required) +- `task`: String (required, the task/problem text from Stage 3) +- `capability`: String (required, capability name) +- `capability_id`: String (required) +- `area`: String (required, area name) +- `area_id`: String (required) +- `domain`: String (required, domain name) +- `domain_id`: String (required) +- `solution`: String (required, the final solution) +- `reasoning`: String (required, explanation of the solution) +- `numerical_answer`: String (optional, JSON string with numerical results) +- `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) + - This field can contain any generation-specific data (e.g., debate rounds, agent interactions, pipeline type) + - Structure is flexible and depends on the generation method (agentic, single-agent, etc.) + +### ValidationResult + +**Fields:** +- `task_id`: String (required) +- `task`: String (required, the task/problem text from Stage 3) +- `capability`: String (required, capability name) +- `capability_id`: String (required) +- `area`: String (required, area name) +- `area_id`: String (required) +- `domain`: String (required, domain name) +- `domain_id`: String (required) +- `verification`: Boolean (required, overall validation status - whether the solution is verified/valid) +- `feedback`: String (required, detailed feedback on the validation) +- `score`: Float (optional, validation score, typically 0.0 to 1.0) +- `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) + - This field can contain any validation-specific data (e.g., validation method, criteria details, error details) + - Structure is flexible and depends on the validation method + +--- + ## Stage 0: Experiment Setup ### Input -All inputs come from the configuration file. Important fields: +All inputs come from a configuration YAML file (e.g., `src/cfg/agentic_config.yaml`). Important fields include: - **Experiment ID**: String - The experiment identifier (e.g., "r0_10x10") - **Domain Name**: String - The domain name (e.g., "personal finance", "mathematics") - **Description**: String (optional) - Domain description -- **Configuration**: Dict - Complete experiment configuration (all config sections: `global_cfg`, `debate_cfg`, `agents`, `area_generation`, `capability_generation`, `task_generation`, `task_solver`, `exp_cfg`, etc.) +- **Output Base Directory**: String - Base output directory for all pipeline outputs (e.g., `global_cfg.output_dir` in agentic pipeline) + +**Note:** The `experiment_id` and `output_base_dir` from the config YAML file are consistent across all stages. All stage-specific configurations (e.g., `num_areas`, `num_capabilities_per_area`, `num_tasks_per_capability`) also come from this same config YAML file. ### Tag Handling - **No input tag required** (first stage) -- **No resume tag** - Always creates new files (overwrites if exists) +- **No resume tag** - Not applicable (single domain JSON, always creates new files) ### Outputs This stage creates two files: -1. `experiment.json` - Experiment metadata and complete configuration +1. `experiment.json` - Experiment metadata 2. `domain.json` - Domain metadata #### Output 1: `experiment.json` @@ -186,215 +344,131 @@ This stage creates two files: **Stage Output:** Experiment dataclass + PipelineMetadata **Save Function:** `save_experiment_output(experiment: Experiment, metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage creates `Experiment` dataclass object with experiment information and configuration -- Stage creates `PipelineMetadata` dataclass object with metadata -- Pass both to `save_experiment_output(experiment, metadata, output_path)` which creates `ExperimentMetadata` dataclass, serializes to JSON, and writes to file - **File Path:** `//experiment.json` ```json { "metadata": { "experiment_id": "r0_10x10", - "stage": "experiment_setup", - "timestamp": "2025-11-06T12:00:00Z" + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T12:00:00Z", + "input_stage_tag": null, + "output_stage_tag": null, + "resume": false }, "experiment": { "experiment_id": "r0_10x10", "domain": "personal finance", - "domain_id": "personal_finance", + "domain_id": "domain_000", + "pipeline_type": "agentic", "configuration": { - "global_cfg": { - "domain": "personal finance", - "output_dir": "agentic_outputs" - }, - "debate_cfg": { - "max_round": 5 - }, - "agents": { - "scientist_a": { - "model_name": "gpt-5", - "seed": 8 - }, - "scientist_b": { - "model_name": "gemini-2.5-pro", - "seed": 88 - }, - "moderator": { - "model_name": "claude-opus-4-1-20250805", - "seed": 888 - } - }, - "area_generation": { - "num_areas": 10 - }, - "capability_generation": { - "num_capabilities_per_area": 5 - }, - "task_generation": { - "num_final_problems_per_capability": 3, - "buffer_param": 2, - "max_rounds": 3 - }, - "task_solver": { - "max_tasks": 0, - "max_rounds": 1 - }, - "exp_cfg": { - "exp_id": "r0_10x10" - } + ... } } } ``` -**Schema (JSON representation of ExperimentMetadata dataclass):** -- `metadata`: Object containing pipeline metadata - - `experiment_id`: String (required, experiment identifier) - - `stage`: String (required, value: "experiment_setup") - - `timestamp`: String (required, ISO 8601 format) -- `experiment`: Object containing experiment information - - `experiment_id`: String (required, experiment identifier) - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `configuration`: Object (required, all configuration used for this experiment) - - Contains all config sections: `global_cfg`, `debate_cfg`, `agents`, `area_generation`, `capability_generation`, `task_generation`, `task_solver`, `exp_cfg`, etc. - - Structure matches the input configuration format exactly +**Schema:** See `Experiment` and `PipelineMetadata` dataclasses in the Dataclasses section above. #### Output 2: `domain.json` **Stage Output:** Domain dataclass object + PipelineMetadata **Save Function:** `save_domain_output(domain: Domain, metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage creates `Domain` dataclass object with domain information -- Stage creates `PipelineMetadata` dataclass object with metadata -- Pass both to `save_domain_output(domain, metadata, output_path)` which serializes to JSON and writes to file - -**File Path:** `//domain.json` +**File Path:** `//domain/domain.json` ```json { "metadata": { - "domain": "personal finance", - "domain_id": "personal_finance", - "stage": "experiment_setup", - "timestamp": "2025-11-06T12:00:00Z" + "experiment_id": "r0_10x10", + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T12:00:00Z", + "input_stage_tag": null, + "output_stage_tag": null, + "resume": false }, "domain": { "name": "personal finance", - "domain_id": "personal_finance", + "domain_id": "domain_000", "description": "Personal finance domain covering budgeting, investing, retirement planning, etc." } } ``` -**Schema (JSON representation of Domain dataclass):** -- `metadata`: Object containing pipeline metadata - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `stage`: String (required, value: "experiment_setup") - - `timestamp`: String (required, ISO 8601 format) -- `domain`: Object containing domain information - - `name`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified identifier, filesystem-safe) - - `description`: String (optional, domain description) - --- ## Stage 1: Area Generation ### Input -- **Domain**: Domain object (from Stage 0) - Loaded from `domain.json` -- **Configuration**: Dict - Stage-specific configuration (e.g., `num_areas`) +- **Domain**: Domain object (from Stage 0) - Loaded from `domain/domain.json` +- **Configuration**: Dict - Stage-specific configuration from config YAML file (e.g., `num_areas`) ### Tag Handling -- **Input tag**: Not applicable (uses domain.json which has no tag) -- **Resume tag**: Optional - If provided, loads from `areas//areas.json` and continues incomplete area generation -- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `areas//areas.json` +- **Input tag**: Not applicable (uses `domain/domain.json` from Stage 0, which has no tag) +- **Resume tag**: Not applicable (single `areas.json` file with all areas, always creates new files) +- **New tag**: Generates new tag and creates `areas//areas.json` ### Output: `areas.json` **Stage Output:** List[Area] dataclasses + PipelineMetadata **Save Function:** `save_areas_output(areas: List[Area], metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage generates list of `Area` dataclass objects -- Stage creates `PipelineMetadata` dataclass object with metadata -- Pass both to `save_areas_output(areas, metadata, output_path)` which creates `AreaGenerationOutput` dataclass, serializes to JSON, and writes to file - **File Path:** `//areas//areas.json` ```json { "metadata": { - "domain": "personal finance", - "domain_id": "personal_finance", - "stage": "area_generation", - "tag": "_20251009_122040", - "timestamp": "2025-11-06T12:00:00Z" + "experiment_id": "r0_10x10", + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T12:00:00Z", + "input_stage_tag": null, + "output_stage_tag": "_20251009_122040", + "resume": false }, "areas": [ { "name": "Cash Flow & Budget Management", "area_id": "area_000", - "description": "Design and monitor budgets using various methodologies..." + "description": "Design and monitor budgets using various methodologies...", + "domain": "personal finance", + "domain_id": "domain_000" } ] } ``` -**Schema (JSON representation of AreaGenerationOutput dataclass):** -- `metadata`: Object containing pipeline metadata - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `stage`: String (required, value: "area_generation") - - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) - - `timestamp`: String (required, ISO 8601 format) - - Note: No `input_tag` field (Stage 1 uses `domain.json` which has no tag) -- `areas`: Array of Area objects - - `name`: String (required, human-readable name, unique within domain) - - `area_id`: String (required, format `area_` + zero-padded 3-digit number, unique within experiment) - - `description`: String (required, detailed description) - --- ## Stage 2: Capability Generation ### Input -- **Areas**: Array of Area objects (from Stage 1) - Loaded from `areas//areas.json` - **Areas tag**: String - Tag from Stage 1 output (e.g., `_20251009_122040`) -- **Configuration**: Dict - Stage-specific configuration (e.g., `num_capabilities_per_area`) + - Loads areas from `areas//areas.json` +- **Configuration**: Dict - Stage-specific configuration from config YAML file (e.g., `num_capabilities_per_area`) ### Tag Handling -- **Input tag**: Required - `areas_tag` from Stage 1 output (e.g., `_20251009_122040`) - - Loads areas from `areas//areas.json` -- **Resume tag**: Optional - If provided, loads from `/capabilities//capabilities.json` for each area and continues incomplete capability generation -- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `/capabilities//capabilities.json` for each area +- **Resume tag**: Optional - If provided, goes to `capabilities//` directory + - For each area_id, checks if `capabilities///capabilities.json` exists + - If file exists, capabilities for that area were already generated successfully, so skip it + - If file doesn't exist, creates `/` subdirectory and generates capabilities for that area +- **New tag**: If no resume tag provided, generates new tag (cap_tag) for this capability generation run + - For each area, creates `capabilities///capabilities.json` ### Output: `capabilities.json` (one per area) **Stage Output:** List[Capability] dataclasses + PipelineMetadata **Save Function:** `save_capabilities_output(capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage generates list of `Capability` dataclass objects for an area -- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id) -- Pass both to `save_capabilities_output(capabilities, metadata, output_path)` which creates `CapabilityGenerationOutput` dataclass, serializes to JSON, and writes to file +**File Path:** `//capabilities///capabilities.json` -**File Path:** `///capabilities//capabilities.json` -Where `` is a string in format `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) ```json { "metadata": { - "domain": "personal finance", - "domain_id": "personal_finance", - "area": "Cash Flow & Budget Management", - "area_id": "area_000", - "stage": "capability_generation", - "input_tag": "_20251009_122040", - "tag": "_20251009_131252", - "timestamp": "2025-11-06T12:30:00Z" + "experiment_id": "r0_10x10", + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T12:30:00Z", + "input_stage_tag": "_20251009_122040", + "output_stage_tag": "_20251009_131252", + "resume": false }, "capabilities": [ { @@ -402,321 +476,199 @@ Where `` is a string in format `area_` + zero-padded 3-digit number (e. "capability_id": "cap_000", "description": "Define the strategic framework and methodology for budgeting...", "area": "Cash Flow & Budget Management", - "area_id": "area_000" + "area_id": "area_000", + "domain": "personal finance", + "domain_id": "domain_000" } ] } ``` -**Schema (JSON representation of CapabilityGenerationOutput dataclass):** -- `metadata`: Object containing pipeline metadata - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `area`: String (required, human-readable area name, must match an area name from Stage 1) - - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) - - `stage`: String (required, value: "capability_generation") - - `input_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the areas tag from Stage 1 used as input) - - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) - - `timestamp`: String (required, ISO 8601 format) -- `capabilities`: Array of Capability objects - - `name`: String (required, human-readable name, unique within area) - - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number starting from 000, unique within area) - - `description`: String (required, detailed description) - - `area`: String (required, human-readable area name, must match parent area name) - - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match parent area_id) - --- ## Stage 3: Task Generation ### Input -- **Capabilities**: Array of Capability objects (from Stage 2) - Loaded from `/capabilities//capabilities.json` - **Capabilities tag**: String - Tag from Stage 2 output (e.g., `_20251009_131252`) -- **Configuration**: Dict - Stage-specific configuration (e.g., `num_final_problems_per_capability`) + - Loads capabilities from `capabilities///capabilities.json` for each area +- **Configuration**: Dict - Stage-specific configuration from config YAML file (e.g., `num_final_problems_per_capability`) ### Tag Handling -- **Input tag**: Required - `capabilities_tag` from Stage 2 output (e.g., `_20251009_131252`) - - Loads capabilities from `/capabilities//capabilities.json` for each area -- **Resume tag**: Optional - If provided, loads from `//tasks//tasks.json` for each capability and continues incomplete task generation -- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `//tasks//tasks.json` for each capability +- **Resume tag**: Optional - If provided, goes to `tasks//` directory + - For each `` and ``, checks if `tasks////tasks.json` exists + - If file exists, tasks for that capability were already generated successfully, so skip it + - If file doesn't exist, creates `//` subdirectories and generates tasks for that capability +- **New tag**: If no resume tag provided, generates new tag (task_tag) for this task generation run + - For each capability, creates `tasks////tasks.json` ### Output: `tasks.json` (one per capability) -**Stage Output:** Dict[str, Task] (mapping task_id to Task dataclass) + PipelineMetadata -**Save Function:** `save_tasks_output(tasks: Dict[str, Task], metadata: PipelineMetadata, output_path: Path)` +**Stage Output:** List[Task] dataclasses + PipelineMetadata +**Save Function:** `save_tasks_output(tasks: List[Task], metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage generates dictionary mapping `task_id` strings to `Task` dataclass objects -- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id, capability_id) -- Pass both to `save_tasks_output(tasks, metadata, output_path)` which creates `TaskGenerationOutput` dataclass, serializes to JSON, and writes to file +**File Path:** `//tasks////tasks.json` -**File Path:** `////tasks//tasks.json` -Where `` is format `area_` + zero-padded 3-digit number, `` is format `cap_` + zero-padded 3-digit number (e.g., `area_000/cap_000/`) ```json { "metadata": { - "domain": "personal finance", - "domain_id": "personal_finance", - "area": "Cash Flow & Budget Management", - "area_id": "area_000", - "capability": "budget_policy_and_structure", - "capability_id": "cap_000", - "stage": "task_generation", - "input_tag": "_20251009_131252", - "tag": "_20251014_114358", - "timestamp": "2025-11-06T13:00:00Z" + "experiment_id": "r0_10x10", + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T13:00:00Z", + "input_stage_tag": "_20251009_131252", + "output_stage_tag": "_20251014_114358", + "resume": false }, - "tasks": { - "task_000": { + "tasks": [ + { + "task_id": "task_000", "task": "You are advising a client who wants to set up a zero-based budget...", - "capability_id": "cap_000" + "capability_id": "cap_000", + "capability": "budget_policy_and_structure", + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "domain": "personal finance", + "domain_id": "domain_000" }, - "task_001": { + { + "task_id": "task_001", "task": "A family of four needs to restructure their budget...", - "capability_id": "cap_000" + "capability_id": "cap_000", + "capability": "budget_policy_and_structure", + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "domain": "personal finance", + "domain_id": "domain_000" } - } + ] } ``` -**Schema (JSON representation of TaskGenerationOutput dataclass):** -- `metadata`: Object containing pipeline metadata - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `area`: String (required, human-readable area name, must match an area name from Stage 1) - - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) - - `capability`: String (required, human-readable capability name, must match a capability name from Stage 2) - - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match a capability_id from Stage 2) - - `stage`: String (required, value: "task_generation") - - `input_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the capabilities tag from Stage 2 used as input) - - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) - - `timestamp`: String (required, ISO 8601 format) -- `tasks`: Object mapping task_id to Task object - - `task_id`: String (required, format: `task_` + zero-padded 3-digit number, unique within capability) - - `task`: String (required, the task/problem text) - - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match parent capability_id) - --- ## Stage 4: Solution Generation ### Input -- **Tasks**: Object mapping task_id to Task objects (from Stage 3) - Loaded from `//tasks//tasks.json` - **Tasks tag**: String - Tag from Stage 3 output (e.g., `_20251014_114358`) -- **Configuration**: Dict - Stage-specific configuration (e.g., `max_rounds`) + - For each area and capability, loads tasks from `tasks////tasks.json` +- **Configuration**: Dict - Stage-specific configuration from config YAML file (e.g., `max_rounds`) ### Tag Handling -- **Input tag**: Required - `tasks_tag` from Stage 3 output (e.g., `_20251014_114358`) - - Loads tasks from `//tasks//tasks.json` for each capability -- **Resume tag**: Optional - If provided, checks for existing solutions in `//solutions//_solution.json` and continues incomplete solution generation -- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `//solutions//_solution.json` for each task +- **Resume tag**: Optional - If provided, goes to `solutions//` directory + - For each area_id, capability_id, and task_id combination, checks if `solutions/////solution.json` exists + - If file exists, solution for that task was already generated successfully, so skip it + - If file doesn't exist, creates `///` subdirectories and generates solution for that task +- **New tag**: If no resume tag provided, generates new tag (solution_tag) for this solution generation run + - For each task, creates `solutions/////solution.json` -### Output: `_solution.json` (one per task) +### Output: `solution.json` (one per task) -**Stage Output:** TaskSolution dataclass + List[AgentSolution] dataclasses + PipelineMetadata -**Save Function:** `save_solution_output(task_solution: TaskSolution, all_solutions: List[AgentSolution], metadata: PipelineMetadata, output_path: Path)` +**Stage Output:** TaskSolution dataclass + PipelineMetadata +**Save Function:** `save_solution_output(task_solution: TaskSolution, metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage generates `TaskSolution` dataclass object with solution information -- Stage generates list of `AgentSolution` dataclass objects -- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id, capability_id, task_id) -- Pass all to `save_solution_output(task_solution, all_solutions, metadata, output_path)` which creates `SolutionGenerationOutput` dataclass, serializes to JSON, and writes to file +**File Path:** `//solutions/////solution.json` -**File Path:** `////solutions//_solution.json` -Where `` is format `area_` + zero-padded 3-digit number, `` is format `cap_` + zero-padded 3-digit number, `` is format `task_` + zero-padded 3-digit number (e.g., `task_000_solution.json`) ```json { "metadata": { - "domain": "personal finance", - "domain_id": "personal_finance", - "area": "Cash Flow & Budget Management", - "area_id": "area_000", - "capability_name": "budget_policy_and_structure", - "capability_id": "cap_000", - "task_id": "task_000", - "stage": "solution_generation", - "input_tag": "_20251014_114358", - "tag": "_20251016_182128", - "timestamp": "2025-11-06T13:30:00Z" + "experiment_id": "r0_10x10", + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T13:30:00Z", + "input_stage_tag": "_20251014_114358", + "output_stage_tag": "_20251016_182128", + "resume": false }, "task_id": "task_000", - "capability_name": "budget_policy_and_structure", + "task": "You are advising a client who wants to set up a zero-based budget...", + "capability": "budget_policy_and_structure", "capability_id": "cap_000", - "area_name": "Cash Flow & Budget Management", + "area": "Cash Flow & Budget Management", "area_id": "area_000", - "problem": "You are advising a client who wants to set up a zero-based budget...", + "domain": "personal finance", + "domain_id": "domain_000", "solution": "The optimal approach is to use a zero-based budgeting methodology...", - "numerical_answer": "{\"budget_allocation\": {...}}", "reasoning": "Both agents agreed on the zero-based approach because...", - "consensus_reached": true, - "total_rounds": 2, - "all_solutions": [ - { - "agent_id": "A", - "task_id": "task_000", - "thought": "I need to analyze the client's financial situation...", - "final_answer": "{\"recommendation\": {...}}", - "numerical_answer": "null", - "round_number": "0" - }, - { - "agent_id": "B", - "task_id": "task_000", - "thought": "The client's income and expenses suggest...", - "final_answer": "{\"recommendation\": {...}}", - "numerical_answer": "null", - "round_number": "0" - } - ] + "numerical_answer": "{\"budget_allocation\": {...}}", + "generation_metadata": { + "pipeline_type": "agentic", + "consensus_reached": true, + "total_rounds": 2, + "agents": [ + { + "agent_id": "A", + "thought": "I need to analyze the client's financial situation...", + "final_answer": "{\"recommendation\": {...}}", + "round_number": 0 + }, + { + "agent_id": "B", + "thought": "The client's income and expenses suggest...", + "final_answer": "{\"recommendation\": {...}}", + "round_number": 0 + } + ] + } } ``` -**Schema (JSON representation of SolutionGenerationOutput dataclass):** -- `metadata`: Object containing pipeline metadata - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `area`: String (required, human-readable area name, must match an area name from Stage 1) - - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) - - `capability_name`: String (required, human-readable capability name, must match a capability name from Stage 2) - - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match a capability_id from Stage 2) - - `task_id`: String (required, must match a task_id from Stage 3) - - `stage`: String (required, value: "solution_generation") - - `input_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tasks tag from Stage 3 used as input) - - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) - - `timestamp`: String (required, ISO 8601 format) -- `task_id`: String (required, must match metadata.task_id) -- `capability_name`: String (required, human-readable capability name, must match metadata.capability_name) -- `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match metadata.capability_id) -- `area_name`: String (required, human-readable area name, must match metadata.area) -- `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match metadata.area_id) -- `problem`: String (required, the task text from Stage 3) -- `solution`: String (required, the final consensus solution) -- `numerical_answer`: String (optional, JSON string with numerical results) -- `reasoning`: String (required, explanation of consensus or disagreement) -- `consensus_reached`: Boolean (required, whether agents reached consensus) -- `total_rounds`: Integer (required, number of debate rounds) -- `all_solutions`: Array of AgentSolution objects - - `agent_id`: String (required, "A" or "B") - - `task_id`: String (required, must match parent task_id) - - `thought`: String (required, agent's reasoning) - - `final_answer`: String (required, JSON string with agent's solution) - - `numerical_answer`: String (optional, JSON string or "null") - - `round_number`: String (required, round number as string) - --- ## Stage 5: Validation ### Input -- **Tasks**: Object mapping task_id to Task objects (from Stage 3) - Loaded from `//tasks//tasks.json` -- **Tasks tag**: String - Tag from Stage 3 output (e.g., `_20251014_114358`) -- **Solutions**: Object mapping task_id to TaskSolution objects (from Stage 4) - Loaded from `//solutions//_solution.json` - **Solutions tag**: String - Tag from Stage 4 output (e.g., `_20251016_182128`) -- **Configuration**: Dict - Validation criteria + - For each area, capability, and task, loads solutions from `solutions/////solution.json` + - Task information is included in the solution files, so no separate tasks tag is needed +- **Configuration**: Dict - Stage-specific configuration from config YAML file (e.g., validation criteria) ### Tag Handling -- **Input tags**: Required - Both `tasks_tag` (from Stage 3) and `solutions_tag` (from Stage 4) - - Loads tasks from `//tasks//tasks.json` - - Loads solutions from `//solutions//_solution.json` -- **Resume tag**: Optional - If provided, checks for existing validations in `//validation//_validation.json` and continues incomplete validation -- **New tag**: If no resume tag provided, generates new tag (format: `_YYYYMMDD_HHMMSS`) and creates `//validation//_validation.json` for each task +- **Input tag**: Required - `solutions_tag` from Stage 4 + - For each area, capability, and task, loads solutions from `solutions/////solution.json` + - Task information is included in the solution files +- **Resume tag**: Optional - If provided, goes to `validation//` directory + - For each `///solution.json` in `solutions/`, checks if `validation/////validation.json` exists + - If file exists, validation for that task was already completed successfully, so skip it + - If file doesn't exist, creates `///` subdirectories and generates validation for that task +- **New tag**: If no resume tag provided, generates new tag (validation_tag) for this validation run + - For each task, creates `validation/////validation.json` -### Output: `_validation.json` (one per task) +### Output: `validation.json` (one per task) -**Stage Output:** ValidationResult dataclass + ValidationCriteria dataclass + PipelineMetadata -**Save Function:** `save_validation_output(validation_result: ValidationResult, criteria: ValidationCriteria, metadata: PipelineMetadata, output_path: Path)` +**Stage Output:** ValidationResult dataclass + PipelineMetadata +**Save Function:** `save_validation_output(validation_result: ValidationResult, metadata: PipelineMetadata, output_path: Path)` -**Implementation:** -- Stage generates `ValidationResult` dataclass object with validation information -- Stage generates `ValidationCriteria` dataclass object with criteria results -- Stage creates `PipelineMetadata` dataclass object with metadata (includes area_id, capability_id, task_id) -- Pass all to `save_validation_output(validation_result, criteria, metadata, output_path)` which creates `ValidationOutput` dataclass, serializes to JSON, and writes to file +**File Path:** `//validation/////validation.json` -**File Path:** `////validation//_validation.json` -Where `` is format `area_` + zero-padded 3-digit number, `` is format `cap_` + zero-padded 3-digit number, `` is format `task_` + zero-padded 3-digit number (e.g., `task_000_validation.json`) ```json { "metadata": { - "domain": "personal finance", - "domain_id": "personal_finance", - "area": "Cash Flow & Budget Management", - "area_id": "area_000", - "capability": "budget_policy_and_structure", - "capability_id": "cap_000", - "task_id": "task_000", - "stage": "validation", - "input_tags": { - "tasks_tag": "_20251014_114358", - "solutions_tag": "_20251016_182128" - }, - "tag": "_20251017_091500", - "timestamp": "2025-11-06T14:00:00Z" + "experiment_id": "r0_10x10", + "output_base_dir": "agentic_outputs", + "timestamp": "2025-11-06T14:00:00Z", + "input_stage_tag": "_20251016_182128", + "output_stage_tag": "_20251017_091500", + "resume": false }, "task_id": "task_000", - "capability_name": "budget_policy_and_structure", + "task": "You are advising a client who wants to set up a zero-based budget...", + "capability": "budget_policy_and_structure", "capability_id": "cap_000", - "is_valid": true, - "validation_score": 0.95, - "criteria": { - "solution_completeness": true, - "solution_accuracy": true, - "reasoning_quality": true, - "consensus_quality": true - }, + "area": "Cash Flow & Budget Management", + "area_id": "area_000", + "domain": "personal finance", + "domain_id": "domain_000", + "verification": true, "feedback": "Solution addresses all aspects of the task...", - "errors": [] + "score": 0.95, + "generation_metadata": { + "validation_method": "llm_based", + "criteria": { + "solution_completeness": true, + "solution_accuracy": true, + "reasoning_quality": true + } + } } ``` -**Schema (JSON representation of ValidationOutput dataclass):** -- `metadata`: Object containing pipeline metadata - - `domain`: String (required, human-readable domain name) - - `domain_id`: String (required, slugified domain identifier) - - `area`: String (required, human-readable area name, must match an area name from Stage 1) - - `area_id`: String (required, format `area_` + zero-padded 3-digit number, must match an area_id from Stage 1) - - `capability`: String (required, human-readable capability name, must match a capability name from Stage 2) - - `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match a capability_id from Stage 2) - - `task_id`: String (required, format `task_` + zero-padded 3-digit number, must match a task_id from Stage 3) - - `stage`: String (required, value: "validation") - - `input_tags`: Object (required, contains the input tags used) - - `tasks_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tasks tag from Stage 3 used as input) - - `solutions_tag`: String (required, format `_YYYYMMDD_HHMMSS`, the solutions tag from Stage 4 used as input) - - `tag`: String (required, format `_YYYYMMDD_HHMMSS`, the tag used for this run's output) - - `timestamp`: String (required, ISO 8601 format) -- `task_id`: String (required, must match metadata.task_id) -- `capability_name`: String (required, human-readable capability name, must match metadata.capability) -- `capability_id`: String (required, format `cap_` + zero-padded 3-digit number, must match metadata.capability_id) -- `is_valid`: Boolean (required, overall validation status) -- `validation_score`: Float (required, 0.0 to 1.0) -- `criteria`: Object with boolean criteria (ValidationCriteria dataclass) - - `solution_completeness`: Boolean (required) - - `solution_accuracy`: Boolean (required) - - `reasoning_quality`: Boolean (required) - - `consensus_quality`: Boolean (required) -- `feedback`: String (required, detailed feedback) -- `errors`: Array of strings (required, list of errors if any) --- - -## ID Assignment Rules - -All IDs are string identifiers with explicit prefixes and sequential numbering: - -- **Area IDs**: Format `area_` + zero-padded 3-digit number (e.g., `area_000`, `area_001`) - - Assigned sequentially starting from `area_000` when areas are generated - - Unique within an experiment - -- **Capability IDs**: Format `cap_` + zero-padded 3-digit number (e.g., `cap_000`, `cap_001`) - - Assigned sequentially starting from `cap_000` within each area when capabilities are generated - - Unique within an area (but can repeat across areas, e.g., `area_000/cap_000/` and `area_001/cap_000/`) - -- **Task IDs**: Format `task_` + zero-padded 3-digit number (e.g., `task_000`, `task_001`) - - Assigned sequentially starting from `task_000` within each capability when tasks are generated - - Unique within a capability - -**ID Properties:** -- String type with explicit prefixes (`area_`, `cap_`, `task_`) -- Sequential assignment (000, 001, 002, ...) -- Zero-padded 3-digit numbers ensure proper sorting -- Stable once assigned (don't change if items are reordered) -- Human-readable names are stored alongside IDs in JSON files From 2d383059a12d8a68fdeb845abb074bb843c766a2 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Tue, 25 Nov 2025 03:31:44 -0500 Subject: [PATCH 3/7] Added dataclasses, load, and save functions + README. --- src/schemas/README.md | 86 +++++++++ src/schemas/__init__.py | 64 +++++++ src/schemas/area_schemas.py | 42 +++++ src/schemas/capability_schemas.py | 48 ++++++ src/schemas/experiment_schemas.py | 71 ++++++++ src/schemas/io_utils.py | 278 ++++++++++++++++++++++++++++++ src/schemas/metadata_schemas.py | 48 ++++++ src/schemas/solution_schemas.py | 60 +++++++ src/schemas/task_schemas.py | 30 ++++ src/schemas/validation_schemas.py | 60 +++++++ 10 files changed, 787 insertions(+) create mode 100644 src/schemas/README.md create mode 100644 src/schemas/__init__.py create mode 100644 src/schemas/area_schemas.py create mode 100644 src/schemas/capability_schemas.py create mode 100644 src/schemas/experiment_schemas.py create mode 100644 src/schemas/io_utils.py create mode 100644 src/schemas/metadata_schemas.py create mode 100644 src/schemas/solution_schemas.py create mode 100644 src/schemas/task_schemas.py create mode 100644 src/schemas/validation_schemas.py diff --git a/src/schemas/README.md b/src/schemas/README.md new file mode 100644 index 0000000..73da492 --- /dev/null +++ b/src/schemas/README.md @@ -0,0 +1,86 @@ +# ACE Pipeline Schemas + +This directory contains standardized schemas for all ACE pipeline stages, ensuring consistent data formats across different implementations. + +## Structure + +- **[`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md)** - Complete documentation of input/output formats for each stage +- **Python Dataclasses** - Type-safe data structures for each stage: + - [`experiment_schemas.py`](experiment_schemas.py) - Experiment and Domain (Stage 0) + - [`metadata_schemas.py`](metadata_schemas.py) - Common metadata (PipelineMetadata) + - [`area_schemas.py`](area_schemas.py) - Area generation (Stage 1) + - [`capability_schemas.py`](capability_schemas.py) - Capability generation (Stage 2) + - [`task_schemas.py`](task_schemas.py) - Task generation (Stage 3) + - [`solution_schemas.py`](solution_schemas.py) - Solution generation (Stage 4) + - [`validation_schemas.py`](validation_schemas.py) - Validation (Stage 5) +- **I/O Utilities** - Save and load functions: + - [`io_utils.py`](io_utils.py) - Functions to save/load all stage outputs (save/load functions for all 7 stage outputs) + +## Usage + +### Using Python Dataclasses + +```python +from src.schemas import ( + Experiment, + Domain, + PipelineMetadata, + Area, + Capability, + Task, + TaskSolution, + ValidationResult, +) + +# Create area +area = Area( + name="Cash Flow & Budget Management", + area_id="area_000", + description="Design and monitor budgets...", + domain="personal finance", + domain_id="domain_000", + # generation_metadata is optional +) + +# Convert to dict for JSON serialization +data = area.to_dict() + +# Load from dict +area = Area.from_dict(data) +``` + +### Using Save/Load Functions + +```python +from pathlib import Path +from src.schemas import ( + save_areas_output, + load_areas_output, + PipelineMetadata, + Area, +) + +# Save areas +areas = [Area(...), Area(...)] +metadata = PipelineMetadata( + experiment_id="r0_10x10", + output_base_dir="agentic_outputs", + timestamp="2025-11-06T12:00:00Z", + output_stage_tag="_20251009_122040" +) +save_areas_output(areas, metadata, Path("output/areas.json")) + +# Load areas +areas, metadata = load_areas_output(Path("output/areas.json")) +``` + +## Pipeline Stages + +0. **Experiment Setup** → `Experiment`, `Domain` +1. **Area Generation** → `Area` +2. **Capability Generation** → `Capability` +3. **Task Generation** → `Task` +4. **Solution Generation** → `TaskSolution` +5. **Validation** → `ValidationResult` + +See [`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md) for detailed specifications. diff --git a/src/schemas/__init__.py b/src/schemas/__init__.py new file mode 100644 index 0000000..4b006fa --- /dev/null +++ b/src/schemas/__init__.py @@ -0,0 +1,64 @@ +"""Standardized schemas for ACE pipeline stages. + +This module provides standardized data structures for all pipeline stages, +ensuring consistent input/output formats regardless of internal implementation. +""" + +from src.schemas.area_schemas import Area +from src.schemas.capability_schemas import Capability +from src.schemas.experiment_schemas import Domain, Experiment +from src.schemas.io_utils import ( + load_areas_output, + load_capabilities_output, + load_domain_output, + load_experiment_output, + load_solution_output, + load_tasks_output, + load_validation_output, + save_areas_output, + save_capabilities_output, + save_domain_output, + save_experiment_output, + save_solution_output, + save_tasks_output, + save_validation_output, +) +from src.schemas.metadata_schemas import PipelineMetadata +from src.schemas.solution_schemas import TaskSolution +from src.schemas.task_schemas import Task +from src.schemas.validation_schemas import ValidationResult + + +__all__ = [ + # Metadata + "PipelineMetadata", + # Experiment schemas (Stage 0) + "Experiment", + "Domain", + # Area schemas + "Area", + # Capability schemas + "Capability", + # Task schemas + "Task", + # Solution schemas + "TaskSolution", + # Validation schemas + "ValidationResult", + # I/O functions - Save + "save_experiment_output", + "save_domain_output", + "save_areas_output", + "save_capabilities_output", + "save_tasks_output", + "save_solution_output", + "save_validation_output", + # I/O functions - Load + "load_experiment_output", + "load_domain_output", + "load_areas_output", + "load_capabilities_output", + "load_tasks_output", + "load_solution_output", + "load_validation_output", +] diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py new file mode 100644 index 0000000..b149436 --- /dev/null +++ b/src/schemas/area_schemas.py @@ -0,0 +1,42 @@ +"""Schemas for area generation stage.""" + +from dataclasses import dataclass, field +from typing import Dict, Optional + + +@dataclass +class Area: + """Represents a domain area.""" + + name: str + area_id: str + description: Optional[str] = None + domain: str = "" + domain_id: str = "" + generation_metadata: Optional[Dict] = field(default_factory=dict) + + def to_dict(self): + """Convert to dictionary.""" + result = { + "name": self.name, + "area_id": self.area_id, + "domain": self.domain, + "domain_id": self.domain_id, + } + if self.description is not None: + result["description"] = self.description + if self.generation_metadata: + result["generation_metadata"] = self.generation_metadata + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + name=data["name"], + area_id=data["area_id"], + description=data.get("description"), + domain=data.get("domain", ""), + domain_id=data.get("domain_id", ""), + generation_metadata=data.get("generation_metadata", {}), + ) diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py new file mode 100644 index 0000000..5b1e9d8 --- /dev/null +++ b/src/schemas/capability_schemas.py @@ -0,0 +1,48 @@ +"""Schemas for capability generation stage.""" + +from dataclasses import dataclass, field +from typing import Dict, Optional + + +@dataclass +class Capability: + """Represents a capability within an area.""" + + name: str + capability_id: str + description: Optional[str] = None + area: str = "" + area_id: str = "" + domain: str = "" + domain_id: str = "" + generation_metadata: Optional[Dict] = field(default_factory=dict) + + def to_dict(self): + """Convert to dictionary.""" + result = { + "name": self.name, + "capability_id": self.capability_id, + "area": self.area, + "area_id": self.area_id, + "domain": self.domain, + "domain_id": self.domain_id, + } + if self.description is not None: + result["description"] = self.description + if self.generation_metadata: + result["generation_metadata"] = self.generation_metadata + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + name=data["name"], + capability_id=data["capability_id"], + description=data.get("description"), + area=data.get("area", ""), + area_id=data.get("area_id", ""), + domain=data.get("domain", ""), + domain_id=data.get("domain_id", ""), + generation_metadata=data.get("generation_metadata", {}), + ) diff --git a/src/schemas/experiment_schemas.py b/src/schemas/experiment_schemas.py new file mode 100644 index 0000000..5d76518 --- /dev/null +++ b/src/schemas/experiment_schemas.py @@ -0,0 +1,71 @@ +"""Schemas for experiment setup stage (Stage 0).""" + +from dataclasses import dataclass +from typing import Any, Dict, Optional + + +@dataclass +class Experiment: + """Represents experiment metadata and configuration.""" + + experiment_id: str + domain: str + domain_id: str + pipeline_type: Optional[str] = None + configuration: Dict[str, Any] = None + + def __post_init__(self): + """Initialize configuration if not provided.""" + if self.configuration is None: + self.configuration = {} + + def to_dict(self): + """Convert to dictionary.""" + result = { + "experiment_id": self.experiment_id, + "domain": self.domain, + "domain_id": self.domain_id, + "configuration": self.configuration, + } + if self.pipeline_type is not None: + result["pipeline_type"] = self.pipeline_type + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + experiment_id=data["experiment_id"], + domain=data["domain"], + domain_id=data["domain_id"], + pipeline_type=data.get("pipeline_type"), + configuration=data.get("configuration", {}), + ) + + +@dataclass +class Domain: + """Represents a domain.""" + + name: str + domain_id: str + description: Optional[str] = None + + def to_dict(self): + """Convert to dictionary.""" + result = { + "name": self.name, + "domain_id": self.domain_id, + } + if self.description is not None: + result["description"] = self.description + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + name=data["name"], + domain_id=data["domain_id"], + description=data.get("description"), + ) diff --git a/src/schemas/io_utils.py b/src/schemas/io_utils.py new file mode 100644 index 0000000..335757a --- /dev/null +++ b/src/schemas/io_utils.py @@ -0,0 +1,278 @@ +"""I/O utilities for saving and loading pipeline stage outputs.""" + +import json +from pathlib import Path +from typing import List, Tuple + +from src.schemas.area_schemas import Area +from src.schemas.capability_schemas import Capability +from src.schemas.experiment_schemas import Domain, Experiment +from src.schemas.metadata_schemas import PipelineMetadata +from src.schemas.solution_schemas import TaskSolution +from src.schemas.task_schemas import Task +from src.schemas.validation_schemas import ValidationResult + + +def save_experiment_output( + experiment: Experiment, metadata: PipelineMetadata, output_path: Path +) -> None: + """Save experiment output to JSON file. + + Args: + experiment: Experiment dataclass + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + "experiment": experiment.to_dict(), + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def save_domain_output( + domain: Domain, metadata: PipelineMetadata, output_path: Path +) -> None: + """Save domain output to JSON file. + + Args: + domain: Domain dataclass + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + "domain": domain.to_dict(), + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def save_areas_output( + areas: List[Area], metadata: PipelineMetadata, output_path: Path +) -> None: + """Save areas output to JSON file. + + Args: + areas: List of Area dataclasses + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + "areas": [area.to_dict() for area in areas], + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def save_capabilities_output( + capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path +) -> None: + """Save capabilities output to JSON file. + + Args: + capabilities: List of Capability dataclasses + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + "capabilities": [cap.to_dict() for cap in capabilities], + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def save_tasks_output( + tasks: List[Task], metadata: PipelineMetadata, output_path: Path +) -> None: + """Save tasks output to JSON file. + + Args: + tasks: List of Task dataclasses + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + "tasks": [task.to_dict() for task in tasks], + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def save_solution_output( + task_solution: TaskSolution, metadata: PipelineMetadata, output_path: Path +) -> None: + """Save solution output to JSON file. + + Args: + task_solution: TaskSolution dataclass + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + **task_solution.to_dict(), + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +def save_validation_output( + validation_result: ValidationResult, metadata: PipelineMetadata, output_path: Path +) -> None: + """Save validation output to JSON file. + + Args: + validation_result: ValidationResult dataclass + metadata: PipelineMetadata dataclass + output_path: Path to save the JSON file + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + data = { + "metadata": metadata.to_dict(), + **validation_result.to_dict(), + } + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + +# Load functions + + +def load_experiment_output(file_path: Path) -> Tuple[Experiment, PipelineMetadata]: + """Load experiment output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (Experiment, PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + experiment = Experiment.from_dict(data["experiment"]) + return experiment, metadata + + +def load_domain_output(file_path: Path) -> Tuple[Domain, PipelineMetadata]: + """Load domain output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (Domain, PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + domain = Domain.from_dict(data["domain"]) + return domain, metadata + + +def load_areas_output(file_path: Path) -> Tuple[List[Area], PipelineMetadata]: + """Load areas output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (List[Area], PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + areas = [Area.from_dict(area_data) for area_data in data["areas"]] + return areas, metadata + + +def load_capabilities_output( + file_path: Path, +) -> Tuple[List[Capability], PipelineMetadata]: + """Load capabilities output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (List[Capability], PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + capabilities = [Capability.from_dict(cap_data) for cap_data in data["capabilities"]] + return capabilities, metadata + + +def load_tasks_output(file_path: Path) -> Tuple[List[Task], PipelineMetadata]: + """Load tasks output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (List[Task], PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + tasks = [Task.from_dict(task_data) for task_data in data["tasks"]] + return tasks, metadata + + +def load_solution_output(file_path: Path) -> Tuple[TaskSolution, PipelineMetadata]: + """Load solution output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (TaskSolution, PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + # Solution files have flattened structure + # (metadata + all task_solution fields) + solution_data = {k: v for k, v in data.items() if k != "metadata"} + task_solution = TaskSolution.from_dict(solution_data) + return task_solution, metadata + + +def load_validation_output( + file_path: Path, +) -> Tuple[ValidationResult, PipelineMetadata]: + """Load validation output from JSON file. + + Args: + file_path: Path to the JSON file + + Returns + ------- + Tuple of (ValidationResult, PipelineMetadata) + """ + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + metadata = PipelineMetadata.from_dict(data["metadata"]) + # Validation files have flattened structure + # (metadata + all validation_result fields) + validation_data = {k: v for k, v in data.items() if k != "metadata"} + validation_result = ValidationResult.from_dict(validation_data) + return validation_result, metadata diff --git a/src/schemas/metadata_schemas.py b/src/schemas/metadata_schemas.py new file mode 100644 index 0000000..c671552 --- /dev/null +++ b/src/schemas/metadata_schemas.py @@ -0,0 +1,48 @@ +"""Metadata schemas for pipeline stages.""" + +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + + +@dataclass +class PipelineMetadata: + """Standard metadata for all pipeline stage outputs.""" + + experiment_id: str + output_base_dir: str + timestamp: str + input_stage_tag: Optional[str] = None + output_stage_tag: Optional[str] = None + resume: bool = False + + def __post_init__(self): + """Set default timestamp if not provided.""" + if not self.timestamp: + self.timestamp = datetime.utcnow().isoformat() + "Z" + + def to_dict(self): + """Convert to dictionary.""" + result = { + "experiment_id": self.experiment_id, + "output_base_dir": self.output_base_dir, + "timestamp": self.timestamp, + "resume": self.resume, + } + if self.input_stage_tag is not None: + result["input_stage_tag"] = self.input_stage_tag + if self.output_stage_tag is not None: + result["output_stage_tag"] = self.output_stage_tag + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + experiment_id=data["experiment_id"], + output_base_dir=data["output_base_dir"], + timestamp=data["timestamp"], + input_stage_tag=data.get("input_stage_tag"), + output_stage_tag=data.get("output_stage_tag"), + resume=data.get("resume", False), + ) diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py new file mode 100644 index 0000000..e484cc1 --- /dev/null +++ b/src/schemas/solution_schemas.py @@ -0,0 +1,60 @@ +"""Schemas for solution generation stage.""" + +from dataclasses import dataclass, field +from typing import Dict, Optional + + +@dataclass +class TaskSolution: + """Represents the complete solution for a task.""" + + task_id: str + task: str + capability: str + capability_id: str + area: str + area_id: str + domain: str + domain_id: str + solution: str + reasoning: str + numerical_answer: Optional[str] = None + generation_metadata: Optional[Dict] = field(default_factory=dict) + + def to_dict(self): + """Convert to dictionary.""" + result = { + "task_id": self.task_id, + "task": self.task, + "capability": self.capability, + "capability_id": self.capability_id, + "area": self.area, + "area_id": self.area_id, + "domain": self.domain, + "domain_id": self.domain_id, + "solution": self.solution, + "reasoning": self.reasoning, + } + if self.numerical_answer is not None: + result["numerical_answer"] = self.numerical_answer + if self.generation_metadata: + result["generation_metadata"] = self.generation_metadata + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + task_id=data["task_id"], + task=data["task"], + capability=data["capability"], + capability_id=data["capability_id"], + area=data["area"], + area_id=data["area_id"], + domain=data["domain"], + domain_id=data["domain_id"], + solution=data["solution"], + reasoning=data["reasoning"], + numerical_answer=data.get("numerical_answer"), + generation_metadata=data.get("generation_metadata", {}), + ) diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py new file mode 100644 index 0000000..882a6ab --- /dev/null +++ b/src/schemas/task_schemas.py @@ -0,0 +1,30 @@ +"""Schemas for task generation stage.""" + +from dataclasses import dataclass + + +@dataclass +class Task: + """Represents a task for a capability.""" + + task_id: str + task: str + capability_id: str + capability: str + area: str + area_id: str + domain: str + domain_id: str + + def to_dict(self): + """Convert to dictionary.""" + return { + "task_id": self.task_id, + "task": self.task, + "capability_id": self.capability_id, + "capability": self.capability, + "area": self.area, + "area_id": self.area_id, + "domain": self.domain, + "domain_id": self.domain_id, + } diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py new file mode 100644 index 0000000..8d3b521 --- /dev/null +++ b/src/schemas/validation_schemas.py @@ -0,0 +1,60 @@ +"""Schemas for validation stage.""" + +from dataclasses import dataclass, field +from typing import Dict, Optional + + +@dataclass +class ValidationResult: + """Validation result for a single task.""" + + task_id: str + task: str + capability: str + capability_id: str + area: str + area_id: str + domain: str + domain_id: str + verification: bool + feedback: str + score: Optional[float] = None + generation_metadata: Optional[Dict] = field(default_factory=dict) + + def to_dict(self): + """Convert to dictionary.""" + result = { + "task_id": self.task_id, + "task": self.task, + "capability": self.capability, + "capability_id": self.capability_id, + "area": self.area, + "area_id": self.area_id, + "domain": self.domain, + "domain_id": self.domain_id, + "verification": self.verification, + "feedback": self.feedback, + } + if self.score is not None: + result["score"] = self.score + if self.generation_metadata: + result["generation_metadata"] = self.generation_metadata + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + task_id=data["task_id"], + task=data["task"], + capability=data["capability"], + capability_id=data["capability_id"], + area=data["area"], + area_id=data["area_id"], + domain=data["domain"], + domain_id=data["domain_id"], + verification=data["verification"], + feedback=data["feedback"], + score=data.get("score"), + generation_metadata=data.get("generation_metadata", {}), + ) From af0a03a86919ab6dc26501cfa85b0a5077212188 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Mon, 1 Dec 2025 14:49:53 -0500 Subject: [PATCH 4/7] Refactor schemas: split Domain/Experiment into separate files, rename I/O functions, use dataclass objects for hierarchical relationships, and improve documentation --- src/schemas/PIPELINE_SCHEMAS.md | 16 ++++---- src/schemas/README.md | 17 ++++---- src/schemas/__init__.py | 59 ++++++++++++++------------- src/schemas/area_schemas.py | 26 ++++++++---- src/schemas/capability_schemas.py | 44 ++++++++++++++------ src/schemas/domain_schemas.py | 35 ++++++++++++++++ src/schemas/experiment_schemas.py | 42 +++---------------- src/schemas/io_utils.py | 33 ++++++++------- src/schemas/metadata_schemas.py | 39 +++++++++++++++--- src/schemas/solution_schemas.py | 68 ++++++++++++++++++++++--------- src/schemas/task_schemas.py | 68 ++++++++++++++++++++++++------- src/schemas/validation_schemas.py | 68 ++++++++++++++++++++++--------- 12 files changed, 340 insertions(+), 175 deletions(-) create mode 100644 src/schemas/domain_schemas.py diff --git a/src/schemas/PIPELINE_SCHEMAS.md b/src/schemas/PIPELINE_SCHEMAS.md index 7bef4f7..d1dd4d8 100644 --- a/src/schemas/PIPELINE_SCHEMAS.md +++ b/src/schemas/PIPELINE_SCHEMAS.md @@ -25,7 +25,7 @@ Each stage follows a consistent pattern: **Important:** All stage implementations must follow this pattern to ensure the pipeline is clean, consistent, and maintainable. This enables interoperability between different implementations, resumability of failed runs, and clear traceability through the pipeline. -**Note:** The dataclasses, save functions (`save__output(data, metadata, output_path)`), and load functions (`load__output(file_path) -> `) for each stage will be provided and must be used. Do not implement custom serialization or data structures - use the standardized schemas to ensure consistency across the pipeline. Dataclasses provide type safety, validation, and clear structure. JSON is the serialization format. +**Note:** The dataclasses, save functions (`save_(data, metadata, output_path)`), and load functions (`load_(file_path) -> `) for each stage will be provided and must be used. Do not implement custom serialization or data structures - use the standardized schemas to ensure consistency across the pipeline. Dataclasses provide type safety, validation, and clear structure. JSON is the serialization format. **Iteration Note:** Some stages operate on subsets (one area, capability, or task at a time) and require an outer orchestrator/loop script to iterate over all items: - **Stage 2 (Capability Generation)**: Operates on one area at a time - orchestrator loops over all areas from Stage 1 @@ -342,7 +342,7 @@ This stage creates two files: #### Output 1: `experiment.json` **Stage Output:** Experiment dataclass + PipelineMetadata -**Save Function:** `save_experiment_output(experiment: Experiment, metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_experiment(experiment: Experiment, metadata: PipelineMetadata, output_path: Path)` **File Path:** `//experiment.json` @@ -373,7 +373,7 @@ This stage creates two files: #### Output 2: `domain.json` **Stage Output:** Domain dataclass object + PipelineMetadata -**Save Function:** `save_domain_output(domain: Domain, metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_domain(domain: Domain, metadata: PipelineMetadata, output_path: Path)` **File Path:** `//domain/domain.json` @@ -411,7 +411,7 @@ This stage creates two files: ### Output: `areas.json` **Stage Output:** List[Area] dataclasses + PipelineMetadata -**Save Function:** `save_areas_output(areas: List[Area], metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_areas(areas: List[Area], metadata: PipelineMetadata, output_path: Path)` **File Path:** `//areas//areas.json` ```json @@ -456,7 +456,7 @@ This stage creates two files: ### Output: `capabilities.json` (one per area) **Stage Output:** List[Capability] dataclasses + PipelineMetadata -**Save Function:** `save_capabilities_output(capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_capabilities(capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path)` **File Path:** `//capabilities///capabilities.json` @@ -504,7 +504,7 @@ This stage creates two files: ### Output: `tasks.json` (one per capability) **Stage Output:** List[Task] dataclasses + PipelineMetadata -**Save Function:** `save_tasks_output(tasks: List[Task], metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_tasks(tasks: List[Task], metadata: PipelineMetadata, output_path: Path)` **File Path:** `//tasks////tasks.json` @@ -563,7 +563,7 @@ This stage creates two files: ### Output: `solution.json` (one per task) **Stage Output:** TaskSolution dataclass + PipelineMetadata -**Save Function:** `save_solution_output(task_solution: TaskSolution, metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_solution(task_solution: TaskSolution, metadata: PipelineMetadata, output_path: Path)` **File Path:** `//solutions/////solution.json` @@ -634,7 +634,7 @@ This stage creates two files: ### Output: `validation.json` (one per task) **Stage Output:** ValidationResult dataclass + PipelineMetadata -**Save Function:** `save_validation_output(validation_result: ValidationResult, metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_validation(validation_result: ValidationResult, metadata: PipelineMetadata, output_path: Path)` **File Path:** `//validation/////validation.json` diff --git a/src/schemas/README.md b/src/schemas/README.md index 73da492..a2bc443 100644 --- a/src/schemas/README.md +++ b/src/schemas/README.md @@ -6,7 +6,8 @@ This directory contains standardized schemas for all ACE pipeline stages, ensuri - **[`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md)** - Complete documentation of input/output formats for each stage - **Python Dataclasses** - Type-safe data structures for each stage: - - [`experiment_schemas.py`](experiment_schemas.py) - Experiment and Domain (Stage 0) + - [`experiment_schemas.py`](experiment_schemas.py) - Experiment (Stage 0) + - [`domain_schemas.py`](domain_schemas.py) - Domain (Stage 0) - [`metadata_schemas.py`](metadata_schemas.py) - Common metadata (PipelineMetadata) - [`area_schemas.py`](area_schemas.py) - Area generation (Stage 1) - [`capability_schemas.py`](capability_schemas.py) - Capability generation (Stage 2) @@ -22,8 +23,8 @@ This directory contains standardized schemas for all ACE pipeline stages, ensuri ```python from src.schemas import ( - Experiment, Domain, + Experiment, PipelineMetadata, Area, Capability, @@ -33,12 +34,12 @@ from src.schemas import ( ) # Create area +domain = Domain(name="Personal Finance", domain_id="domain_000") area = Area( name="Cash Flow & Budget Management", area_id="area_000", description="Design and monitor budgets...", - domain="personal finance", - domain_id="domain_000", + domain=domain, # generation_metadata is optional ) @@ -54,8 +55,8 @@ area = Area.from_dict(data) ```python from pathlib import Path from src.schemas import ( - save_areas_output, - load_areas_output, + save_areas, + load_areas, PipelineMetadata, Area, ) @@ -68,10 +69,10 @@ metadata = PipelineMetadata( timestamp="2025-11-06T12:00:00Z", output_stage_tag="_20251009_122040" ) -save_areas_output(areas, metadata, Path("output/areas.json")) +save_areas(areas, metadata, Path("output/areas.json")) # Load areas -areas, metadata = load_areas_output(Path("output/areas.json")) +areas, metadata = load_areas(Path("output/areas.json")) ``` ## Pipeline Stages diff --git a/src/schemas/__init__.py b/src/schemas/__init__.py index 4b006fa..29e46fc 100644 --- a/src/schemas/__init__.py +++ b/src/schemas/__init__.py @@ -6,22 +6,23 @@ from src.schemas.area_schemas import Area from src.schemas.capability_schemas import Capability -from src.schemas.experiment_schemas import Domain, Experiment +from src.schemas.domain_schemas import Domain +from src.schemas.experiment_schemas import Experiment from src.schemas.io_utils import ( - load_areas_output, - load_capabilities_output, - load_domain_output, - load_experiment_output, - load_solution_output, - load_tasks_output, - load_validation_output, - save_areas_output, - save_capabilities_output, - save_domain_output, - save_experiment_output, - save_solution_output, - save_tasks_output, - save_validation_output, + load_areas, + load_capabilities, + load_domain, + load_experiment, + load_solution, + load_tasks, + load_validation, + save_areas, + save_capabilities, + save_domain, + save_experiment, + save_solution, + save_tasks, + save_validation, ) from src.schemas.metadata_schemas import PipelineMetadata from src.schemas.solution_schemas import TaskSolution @@ -46,19 +47,19 @@ # Validation schemas "ValidationResult", # I/O functions - Save - "save_experiment_output", - "save_domain_output", - "save_areas_output", - "save_capabilities_output", - "save_tasks_output", - "save_solution_output", - "save_validation_output", + "save_experiment", + "save_domain", + "save_areas", + "save_capabilities", + "save_tasks", + "save_solution", + "save_validation", # I/O functions - Load - "load_experiment_output", - "load_domain_output", - "load_areas_output", - "load_capabilities_output", - "load_tasks_output", - "load_solution_output", - "load_validation_output", + "load_experiment", + "load_domain", + "load_areas", + "load_capabilities", + "load_tasks", + "load_solution", + "load_validation", ] diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py index b149436..cd212b8 100644 --- a/src/schemas/area_schemas.py +++ b/src/schemas/area_schemas.py @@ -1,8 +1,14 @@ -"""Schemas for area generation stage.""" +"""Schemas for area generation stage (Stage 1). + +Defines Area dataclass representing a domain area. Areas are high-level categories +within a domain (e.g., "Budgeting" within "Personal Finance"). +""" from dataclasses import dataclass, field from typing import Dict, Optional +from src.schemas.domain_schemas import Domain + @dataclass class Area: @@ -11,8 +17,7 @@ class Area: name: str area_id: str description: Optional[str] = None - domain: str = "" - domain_id: str = "" + domain: Optional[Domain] = None generation_metadata: Optional[Dict] = field(default_factory=dict) def to_dict(self): @@ -20,9 +25,10 @@ def to_dict(self): result = { "name": self.name, "area_id": self.area_id, - "domain": self.domain, - "domain_id": self.domain_id, } + if self.domain is not None: + result["domain"] = self.domain.name + result["domain_id"] = self.domain.domain_id if self.description is not None: result["description"] = self.description if self.generation_metadata: @@ -32,11 +38,17 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" + domain = None + if "domain" in data and "domain_id" in data: + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=None, + ) return cls( name=data["name"], area_id=data["area_id"], description=data.get("description"), - domain=data.get("domain", ""), - domain_id=data.get("domain_id", ""), + domain=domain, generation_metadata=data.get("generation_metadata", {}), ) diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py index 5b1e9d8..ea4c92e 100644 --- a/src/schemas/capability_schemas.py +++ b/src/schemas/capability_schemas.py @@ -1,8 +1,15 @@ -"""Schemas for capability generation stage.""" +"""Schemas for capability generation stage (Stage 2). + +Defines Capability dataclass representing a capability within an area. Capabilities +are specific skills or abilities (e.g., "Budget Creation" within "Budgeting" area). +""" from dataclasses import dataclass, field from typing import Dict, Optional +from src.schemas.area_schemas import Area +from src.schemas.domain_schemas import Domain + @dataclass class Capability: @@ -11,10 +18,7 @@ class Capability: name: str capability_id: str description: Optional[str] = None - area: str = "" - area_id: str = "" - domain: str = "" - domain_id: str = "" + area: Optional[Area] = None generation_metadata: Optional[Dict] = field(default_factory=dict) def to_dict(self): @@ -22,11 +26,13 @@ def to_dict(self): result = { "name": self.name, "capability_id": self.capability_id, - "area": self.area, - "area_id": self.area_id, - "domain": self.domain, - "domain_id": self.domain_id, } + if self.area is not None: + result["area"] = self.area.name + result["area_id"] = self.area.area_id + if self.area.domain is not None: + result["domain"] = self.area.domain.name + result["domain_id"] = self.area.domain.domain_id if self.description is not None: result["description"] = self.description if self.generation_metadata: @@ -36,13 +42,25 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" + area = None + if "area" in data and "area_id" in data: + domain = None + if "domain" in data and "domain_id" in data: + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=None, + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + description=None, + domain=domain, + ) return cls( name=data["name"], capability_id=data["capability_id"], description=data.get("description"), - area=data.get("area", ""), - area_id=data.get("area_id", ""), - domain=data.get("domain", ""), - domain_id=data.get("domain_id", ""), + area=area, generation_metadata=data.get("generation_metadata", {}), ) diff --git a/src/schemas/domain_schemas.py b/src/schemas/domain_schemas.py new file mode 100644 index 0000000..fc1ee18 --- /dev/null +++ b/src/schemas/domain_schemas.py @@ -0,0 +1,35 @@ +"""Schemas for domain (Stage 0). + +Defines Domain dataclass representing the domain being evaluated in the experiment. +""" + +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class Domain: + """Represents a domain.""" + + name: str + domain_id: str + description: Optional[str] = None + + def to_dict(self): + """Convert to dictionary.""" + result = { + "name": self.name, + "domain_id": self.domain_id, + } + if self.description is not None: + result["description"] = self.description + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + return cls( + name=data["name"], + domain_id=data["domain_id"], + description=data.get("description"), + ) diff --git a/src/schemas/experiment_schemas.py b/src/schemas/experiment_schemas.py index 5d76518..fc2cfb7 100644 --- a/src/schemas/experiment_schemas.py +++ b/src/schemas/experiment_schemas.py @@ -1,6 +1,9 @@ -"""Schemas for experiment setup stage (Stage 0).""" +"""Schemas for experiment setup stage (Stage 0). -from dataclasses import dataclass +Defines Experiment dataclass containing experiment configuration and metadata. +""" + +from dataclasses import dataclass, field from typing import Any, Dict, Optional @@ -12,12 +15,7 @@ class Experiment: domain: str domain_id: str pipeline_type: Optional[str] = None - configuration: Dict[str, Any] = None - - def __post_init__(self): - """Initialize configuration if not provided.""" - if self.configuration is None: - self.configuration = {} + configuration: Dict[str, Any] = field(default_factory=dict) def to_dict(self): """Convert to dictionary.""" @@ -41,31 +39,3 @@ def from_dict(cls, data: dict): pipeline_type=data.get("pipeline_type"), configuration=data.get("configuration", {}), ) - - -@dataclass -class Domain: - """Represents a domain.""" - - name: str - domain_id: str - description: Optional[str] = None - - def to_dict(self): - """Convert to dictionary.""" - result = { - "name": self.name, - "domain_id": self.domain_id, - } - if self.description is not None: - result["description"] = self.description - return result - - @classmethod - def from_dict(cls, data: dict): - """Create from dictionary.""" - return cls( - name=data["name"], - domain_id=data["domain_id"], - description=data.get("description"), - ) diff --git a/src/schemas/io_utils.py b/src/schemas/io_utils.py index 335757a..082b50b 100644 --- a/src/schemas/io_utils.py +++ b/src/schemas/io_utils.py @@ -6,14 +6,15 @@ from src.schemas.area_schemas import Area from src.schemas.capability_schemas import Capability -from src.schemas.experiment_schemas import Domain, Experiment +from src.schemas.domain_schemas import Domain +from src.schemas.experiment_schemas import Experiment from src.schemas.metadata_schemas import PipelineMetadata from src.schemas.solution_schemas import TaskSolution from src.schemas.task_schemas import Task from src.schemas.validation_schemas import ValidationResult -def save_experiment_output( +def save_experiment( experiment: Experiment, metadata: PipelineMetadata, output_path: Path ) -> None: """Save experiment output to JSON file. @@ -32,9 +33,7 @@ def save_experiment_output( json.dump(data, f, indent=2, ensure_ascii=False) -def save_domain_output( - domain: Domain, metadata: PipelineMetadata, output_path: Path -) -> None: +def save_domain(domain: Domain, metadata: PipelineMetadata, output_path: Path) -> None: """Save domain output to JSON file. Args: @@ -51,7 +50,7 @@ def save_domain_output( json.dump(data, f, indent=2, ensure_ascii=False) -def save_areas_output( +def save_areas( areas: List[Area], metadata: PipelineMetadata, output_path: Path ) -> None: """Save areas output to JSON file. @@ -70,7 +69,7 @@ def save_areas_output( json.dump(data, f, indent=2, ensure_ascii=False) -def save_capabilities_output( +def save_capabilities( capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path ) -> None: """Save capabilities output to JSON file. @@ -89,7 +88,7 @@ def save_capabilities_output( json.dump(data, f, indent=2, ensure_ascii=False) -def save_tasks_output( +def save_tasks( tasks: List[Task], metadata: PipelineMetadata, output_path: Path ) -> None: """Save tasks output to JSON file. @@ -108,7 +107,7 @@ def save_tasks_output( json.dump(data, f, indent=2, ensure_ascii=False) -def save_solution_output( +def save_solution( task_solution: TaskSolution, metadata: PipelineMetadata, output_path: Path ) -> None: """Save solution output to JSON file. @@ -127,7 +126,7 @@ def save_solution_output( json.dump(data, f, indent=2, ensure_ascii=False) -def save_validation_output( +def save_validation( validation_result: ValidationResult, metadata: PipelineMetadata, output_path: Path ) -> None: """Save validation output to JSON file. @@ -149,7 +148,7 @@ def save_validation_output( # Load functions -def load_experiment_output(file_path: Path) -> Tuple[Experiment, PipelineMetadata]: +def load_experiment(file_path: Path) -> Tuple[Experiment, PipelineMetadata]: """Load experiment output from JSON file. Args: @@ -166,7 +165,7 @@ def load_experiment_output(file_path: Path) -> Tuple[Experiment, PipelineMetadat return experiment, metadata -def load_domain_output(file_path: Path) -> Tuple[Domain, PipelineMetadata]: +def load_domain(file_path: Path) -> Tuple[Domain, PipelineMetadata]: """Load domain output from JSON file. Args: @@ -183,7 +182,7 @@ def load_domain_output(file_path: Path) -> Tuple[Domain, PipelineMetadata]: return domain, metadata -def load_areas_output(file_path: Path) -> Tuple[List[Area], PipelineMetadata]: +def load_areas(file_path: Path) -> Tuple[List[Area], PipelineMetadata]: """Load areas output from JSON file. Args: @@ -200,7 +199,7 @@ def load_areas_output(file_path: Path) -> Tuple[List[Area], PipelineMetadata]: return areas, metadata -def load_capabilities_output( +def load_capabilities( file_path: Path, ) -> Tuple[List[Capability], PipelineMetadata]: """Load capabilities output from JSON file. @@ -219,7 +218,7 @@ def load_capabilities_output( return capabilities, metadata -def load_tasks_output(file_path: Path) -> Tuple[List[Task], PipelineMetadata]: +def load_tasks(file_path: Path) -> Tuple[List[Task], PipelineMetadata]: """Load tasks output from JSON file. Args: @@ -236,7 +235,7 @@ def load_tasks_output(file_path: Path) -> Tuple[List[Task], PipelineMetadata]: return tasks, metadata -def load_solution_output(file_path: Path) -> Tuple[TaskSolution, PipelineMetadata]: +def load_solution(file_path: Path) -> Tuple[TaskSolution, PipelineMetadata]: """Load solution output from JSON file. Args: @@ -256,7 +255,7 @@ def load_solution_output(file_path: Path) -> Tuple[TaskSolution, PipelineMetadat return task_solution, metadata -def load_validation_output( +def load_validation( file_path: Path, ) -> Tuple[ValidationResult, PipelineMetadata]: """Load validation output from JSON file. diff --git a/src/schemas/metadata_schemas.py b/src/schemas/metadata_schemas.py index c671552..343e6b7 100644 --- a/src/schemas/metadata_schemas.py +++ b/src/schemas/metadata_schemas.py @@ -1,4 +1,13 @@ -"""Metadata schemas for pipeline stages.""" +"""Metadata schemas for pipeline stages. + +This module defines PipelineMetadata, which provides execution context and traceability +for all pipeline stage outputs. It tracks experiment ID, timestamps, input/output +version tags, and resume state. Used by all save/load functions and serialized in +JSON output files. + +Note: PipelineMetadata tracks execution context, not content (content identifiers are +in the data objects themselves). +""" from dataclasses import dataclass from datetime import datetime @@ -7,7 +16,24 @@ @dataclass class PipelineMetadata: - """Standard metadata for all pipeline stage outputs.""" + """Standard metadata for all pipeline stage outputs. + + Provides execution context, traceability, and resumability for pipeline stages. + Included with every stage output to track which experiment produced it, when it was + generated, which input version was used, and whether the run was resumed. + + Attributes + ---------- + experiment_id: Unique identifier for the experiment. + output_base_dir: Base directory path where all pipeline outputs are stored. + timestamp: ISO 8601 formatted timestamp (e.g., "2025-11-06T12:00:00Z"). + Auto-generated if not provided. + input_stage_tag: Optional tag for the input version from previous stage + (e.g., "_20251009_122040"). None for Stage 0. + output_stage_tag: Optional tag for this output version + (e.g., "_20251009_131252"). None for Stage 0. + resume: Boolean indicating if this run was resumed from a checkpoint. + """ experiment_id: str output_base_dir: str @@ -17,12 +43,15 @@ class PipelineMetadata: resume: bool = False def __post_init__(self): - """Set default timestamp if not provided.""" + """Set default timestamp if not provided. + + Automatically generates a UTC timestamp in ISO 8601 format if not set. + """ if not self.timestamp: self.timestamp = datetime.utcnow().isoformat() + "Z" def to_dict(self): - """Convert to dictionary.""" + """Convert metadata to dictionary for JSON serialization.""" result = { "experiment_id": self.experiment_id, "output_base_dir": self.output_base_dir, @@ -37,7 +66,7 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): - """Create from dictionary.""" + """Create PipelineMetadata from dictionary (e.g., loaded from JSON).""" return cls( experiment_id=data["experiment_id"], output_base_dir=data["output_base_dir"], diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py index e484cc1..7c84810 100644 --- a/src/schemas/solution_schemas.py +++ b/src/schemas/solution_schemas.py @@ -1,8 +1,17 @@ -"""Schemas for solution generation stage.""" +"""Schemas for solution generation stage (Stage 4). + +Defines TaskSolution dataclass representing a complete solution for a task, including +the solution text, reasoning, and optional numerical answer. +""" from dataclasses import dataclass, field from typing import Dict, Optional +from src.schemas.area_schemas import Area +from src.schemas.capability_schemas import Capability +from src.schemas.domain_schemas import Domain +from src.schemas.task_schemas import Task + @dataclass class TaskSolution: @@ -10,31 +19,29 @@ class TaskSolution: task_id: str task: str - capability: str - capability_id: str - area: str - area_id: str - domain: str - domain_id: str solution: str reasoning: str numerical_answer: Optional[str] = None generation_metadata: Optional[Dict] = field(default_factory=dict) + task_obj: Optional[Task] = None # Full task object with hierarchy def to_dict(self): """Convert to dictionary.""" result = { "task_id": self.task_id, "task": self.task, - "capability": self.capability, - "capability_id": self.capability_id, - "area": self.area, - "area_id": self.area_id, - "domain": self.domain, - "domain_id": self.domain_id, "solution": self.solution, "reasoning": self.reasoning, } + if self.task_obj is not None and self.task_obj.capability is not None: + result["capability_id"] = self.task_obj.capability.capability_id + result["capability"] = self.task_obj.capability.name + if self.task_obj.capability.area is not None: + result["area"] = self.task_obj.capability.area.name + result["area_id"] = self.task_obj.capability.area.area_id + if self.task_obj.capability.area.domain is not None: + result["domain"] = self.task_obj.capability.area.domain.name + result["domain_id"] = self.task_obj.capability.area.domain.domain_id if self.numerical_answer is not None: result["numerical_answer"] = self.numerical_answer if self.generation_metadata: @@ -44,17 +51,40 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" + task_obj = None + if "capability" in data and "capability_id" in data: + area = None + if "area" in data and "area_id" in data: + domain = None + if "domain" in data and "domain_id" in data: + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=None, + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + description=None, + domain=domain, + ) + capability = Capability( + name=data["capability"], + capability_id=data["capability_id"], + description=None, + area=area, + ) + task_obj = Task( + task_id=data["task_id"], + task=data["task"], + capability=capability, + ) return cls( task_id=data["task_id"], task=data["task"], - capability=data["capability"], - capability_id=data["capability_id"], - area=data["area"], - area_id=data["area_id"], - domain=data["domain"], - domain_id=data["domain_id"], solution=data["solution"], reasoning=data["reasoning"], numerical_answer=data.get("numerical_answer"), generation_metadata=data.get("generation_metadata", {}), + task_obj=task_obj, ) diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py index 882a6ab..4ab20f6 100644 --- a/src/schemas/task_schemas.py +++ b/src/schemas/task_schemas.py @@ -1,6 +1,15 @@ -"""Schemas for task generation stage.""" +"""Schemas for task generation stage (Stage 3). + +Defines Task dataclass representing a specific task for a capability. Tasks are +concrete evaluation items that test a capability (e.g., "Create a monthly budget"). +""" from dataclasses import dataclass +from typing import Optional + +from src.schemas.area_schemas import Area +from src.schemas.capability_schemas import Capability +from src.schemas.domain_schemas import Domain @dataclass @@ -9,22 +18,53 @@ class Task: task_id: str task: str - capability_id: str - capability: str - area: str - area_id: str - domain: str - domain_id: str + capability: Optional[Capability] = None def to_dict(self): """Convert to dictionary.""" - return { + result = { "task_id": self.task_id, "task": self.task, - "capability_id": self.capability_id, - "capability": self.capability, - "area": self.area, - "area_id": self.area_id, - "domain": self.domain, - "domain_id": self.domain_id, } + if self.capability is not None: + result["capability_id"] = self.capability.capability_id + result["capability"] = self.capability.name + if self.capability.area is not None: + result["area"] = self.capability.area.name + result["area_id"] = self.capability.area.area_id + if self.capability.area.domain is not None: + result["domain"] = self.capability.area.domain.name + result["domain_id"] = self.capability.area.domain.domain_id + return result + + @classmethod + def from_dict(cls, data: dict): + """Create from dictionary.""" + capability = None + if "capability" in data and "capability_id" in data: + area = None + if "area" in data and "area_id" in data: + domain = None + if "domain" in data and "domain_id" in data: + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=None, + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + description=None, + domain=domain, + ) + capability = Capability( + name=data["capability"], + capability_id=data["capability_id"], + description=None, + area=area, + ) + return cls( + task_id=data["task_id"], + task=data["task"], + capability=capability, + ) diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py index 8d3b521..e53a5c7 100644 --- a/src/schemas/validation_schemas.py +++ b/src/schemas/validation_schemas.py @@ -1,8 +1,17 @@ -"""Schemas for validation stage.""" +"""Schemas for validation stage (Stage 5). + +Defines ValidationResult dataclass representing the validation outcome for a task, +including verification status, feedback, and optional score. +""" from dataclasses import dataclass, field from typing import Dict, Optional +from src.schemas.area_schemas import Area +from src.schemas.capability_schemas import Capability +from src.schemas.domain_schemas import Domain +from src.schemas.task_schemas import Task + @dataclass class ValidationResult: @@ -10,31 +19,29 @@ class ValidationResult: task_id: str task: str - capability: str - capability_id: str - area: str - area_id: str - domain: str - domain_id: str verification: bool feedback: str score: Optional[float] = None generation_metadata: Optional[Dict] = field(default_factory=dict) + task_obj: Optional[Task] = None # Full task object with hierarchy def to_dict(self): """Convert to dictionary.""" result = { "task_id": self.task_id, "task": self.task, - "capability": self.capability, - "capability_id": self.capability_id, - "area": self.area, - "area_id": self.area_id, - "domain": self.domain, - "domain_id": self.domain_id, "verification": self.verification, "feedback": self.feedback, } + if self.task_obj is not None and self.task_obj.capability is not None: + result["capability_id"] = self.task_obj.capability.capability_id + result["capability"] = self.task_obj.capability.name + if self.task_obj.capability.area is not None: + result["area"] = self.task_obj.capability.area.name + result["area_id"] = self.task_obj.capability.area.area_id + if self.task_obj.capability.area.domain is not None: + result["domain"] = self.task_obj.capability.area.domain.name + result["domain_id"] = self.task_obj.capability.area.domain.domain_id if self.score is not None: result["score"] = self.score if self.generation_metadata: @@ -44,17 +51,40 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" + task_obj = None + if "capability" in data and "capability_id" in data: + area = None + if "area" in data and "area_id" in data: + domain = None + if "domain" in data and "domain_id" in data: + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=None, + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + description=None, + domain=domain, + ) + capability = Capability( + name=data["capability"], + capability_id=data["capability_id"], + description=None, + area=area, + ) + task_obj = Task( + task_id=data["task_id"], + task=data["task"], + capability=capability, + ) return cls( task_id=data["task_id"], task=data["task"], - capability=data["capability"], - capability_id=data["capability_id"], - area=data["area"], - area_id=data["area_id"], - domain=data["domain"], - domain_id=data["domain_id"], verification=data["verification"], feedback=data["feedback"], score=data.get("score"), generation_metadata=data.get("generation_metadata", {}), + task_obj=task_obj, ) From 95b0cb352704f4f216fc97b9eff1b9aec32b6e02 Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Mon, 1 Dec 2025 15:07:02 -0500 Subject: [PATCH 5/7] updated class descriptions. --- src/schemas/area_schemas.py | 2 +- src/schemas/capability_schemas.py | 2 +- src/schemas/domain_schemas.py | 2 +- src/schemas/experiment_schemas.py | 2 +- src/schemas/solution_schemas.py | 2 +- src/schemas/task_schemas.py | 2 +- src/schemas/validation_schemas.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py index cd212b8..442bb34 100644 --- a/src/schemas/area_schemas.py +++ b/src/schemas/area_schemas.py @@ -12,7 +12,7 @@ @dataclass class Area: - """Represents a domain area.""" + """Dataclass for domain area.""" name: str area_id: str diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py index ea4c92e..1d7f804 100644 --- a/src/schemas/capability_schemas.py +++ b/src/schemas/capability_schemas.py @@ -13,7 +13,7 @@ @dataclass class Capability: - """Represents a capability within an area.""" + """Dataclass for capability.""" name: str capability_id: str diff --git a/src/schemas/domain_schemas.py b/src/schemas/domain_schemas.py index fc1ee18..c840959 100644 --- a/src/schemas/domain_schemas.py +++ b/src/schemas/domain_schemas.py @@ -9,7 +9,7 @@ @dataclass class Domain: - """Represents a domain.""" + """Dataclass for domain.""" name: str domain_id: str diff --git a/src/schemas/experiment_schemas.py b/src/schemas/experiment_schemas.py index fc2cfb7..de92538 100644 --- a/src/schemas/experiment_schemas.py +++ b/src/schemas/experiment_schemas.py @@ -9,7 +9,7 @@ @dataclass class Experiment: - """Represents experiment metadata and configuration.""" + """Dataclass for experiment metadata and configuration.""" experiment_id: str domain: str diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py index 7c84810..94f0699 100644 --- a/src/schemas/solution_schemas.py +++ b/src/schemas/solution_schemas.py @@ -15,7 +15,7 @@ @dataclass class TaskSolution: - """Represents the complete solution for a task.""" + """Dataclass for task solution.""" task_id: str task: str diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py index 4ab20f6..8e5fcd5 100644 --- a/src/schemas/task_schemas.py +++ b/src/schemas/task_schemas.py @@ -14,7 +14,7 @@ @dataclass class Task: - """Represents a task for a capability.""" + """Dataclass for task.""" task_id: str task: str diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py index e53a5c7..07bdafd 100644 --- a/src/schemas/validation_schemas.py +++ b/src/schemas/validation_schemas.py @@ -15,7 +15,7 @@ @dataclass class ValidationResult: - """Validation result for a single task.""" + """Dataclass for validation result.""" task_id: str task: str From ec5e71597e6a06fd3a1eb50f989875a519a1c84d Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Mon, 1 Dec 2025 15:16:51 -0500 Subject: [PATCH 6/7] updated pipeline schema doc to include changes. --- src/schemas/PIPELINE_SCHEMAS.md | 39 ++++++++++++------------------- src/schemas/area_schemas.py | 2 +- src/schemas/capability_schemas.py | 2 +- src/schemas/domain_schemas.py | 2 +- src/schemas/solution_schemas.py | 4 ++-- src/schemas/task_schemas.py | 4 ++-- src/schemas/validation_schemas.py | 4 ++-- 7 files changed, 24 insertions(+), 33 deletions(-) diff --git a/src/schemas/PIPELINE_SCHEMAS.md b/src/schemas/PIPELINE_SCHEMAS.md index d1dd4d8..e7ca883 100644 --- a/src/schemas/PIPELINE_SCHEMAS.md +++ b/src/schemas/PIPELINE_SCHEMAS.md @@ -248,73 +248,64 @@ All pipeline outputs include a `metadata` object (represented by the `PipelineMe - `name`: String (required, human-readable area name) - `area_id`: String (required) - `description`: String (optional, area description) -- `domain`: String (required, domain name) -- `domain_id`: String (required) +- `domain`: Optional[Domain] (optional, Domain dataclass object) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any generation-specific data (e.g., generation method, parameters, intermediate steps) - Structure is flexible and depends on the generation method +**Note:** When serialized to JSON, the `domain` object is flattened to `domain` (string) and `domain_id` (string) fields. + ### Capability **Fields:** - `name`: String (required, capability name) - `capability_id`: String (required) - `description`: String (optional, capability description) -- `area`: String (required, area name) -- `area_id`: String (required) -- `domain`: String (required, domain name) -- `domain_id`: String (required) +- `area`: Optional[Area] (optional, Area dataclass object) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any generation-specific data (e.g., generation method, parameters, intermediate steps) - Structure is flexible and depends on the generation method +**Note:** When serialized to JSON, the `area` object is flattened to `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. + ### Task **Fields:** - `task_id`: String (required, unique within capability) - `task`: String (required, the task/problem text) -- `capability_id`: String (required) -- `capability`: String (required, capability name) -- `area`: String (required, area name) -- `area_id`: String (required) -- `domain`: String (required, domain name) -- `domain_id`: String (required) +- `capability`: Optional[Capability] (optional, Capability dataclass object) + +**Note:** When serialized to JSON, the `capability` object is flattened to `capability` (string), `capability_id` (string), `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. ### TaskSolution **Fields:** - `task_id`: String (required) - `task`: String (required, the task/problem text from Stage 3) -- `capability`: String (required, capability name) -- `capability_id`: String (required) -- `area`: String (required, area name) -- `area_id`: String (required) -- `domain`: String (required, domain name) -- `domain_id`: String (required) - `solution`: String (required, the final solution) - `reasoning`: String (required, explanation of the solution) - `numerical_answer`: String (optional, JSON string with numerical results) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any generation-specific data (e.g., debate rounds, agent interactions, pipeline type) - Structure is flexible and depends on the generation method (agentic, single-agent, etc.) +- `task_obj`: Optional[Task] (optional, Task dataclass object with full hierarchy) + +**Note:** When serialized to JSON, the `task_obj` object is flattened to `capability` (string), `capability_id` (string), `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. ### ValidationResult **Fields:** - `task_id`: String (required) - `task`: String (required, the task/problem text from Stage 3) -- `capability`: String (required, capability name) -- `capability_id`: String (required) -- `area`: String (required, area name) -- `area_id`: String (required) -- `domain`: String (required, domain name) -- `domain_id`: String (required) - `verification`: Boolean (required, overall validation status - whether the solution is verified/valid) - `feedback`: String (required, detailed feedback on the validation) - `score`: Float (optional, validation score, typically 0.0 to 1.0) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any validation-specific data (e.g., validation method, criteria details, error details) - Structure is flexible and depends on the validation method +- `task_obj`: Optional[Task] (optional, Task dataclass object with full hierarchy) + +**Note:** When serialized to JSON, the `task_obj` object is flattened to `capability` (string), `capability_id` (string), `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. --- diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py index 442bb34..acd1658 100644 --- a/src/schemas/area_schemas.py +++ b/src/schemas/area_schemas.py @@ -1,6 +1,6 @@ """Schemas for area generation stage (Stage 1). -Defines Area dataclass representing a domain area. Areas are high-level categories +Defines Area dataclass for domain area. Areas are high-level categories within a domain (e.g., "Budgeting" within "Personal Finance"). """ diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py index 1d7f804..e300829 100644 --- a/src/schemas/capability_schemas.py +++ b/src/schemas/capability_schemas.py @@ -1,6 +1,6 @@ """Schemas for capability generation stage (Stage 2). -Defines Capability dataclass representing a capability within an area. Capabilities +Defines Capability dataclass for capability within an area. Capabilities are specific skills or abilities (e.g., "Budget Creation" within "Budgeting" area). """ diff --git a/src/schemas/domain_schemas.py b/src/schemas/domain_schemas.py index c840959..999990a 100644 --- a/src/schemas/domain_schemas.py +++ b/src/schemas/domain_schemas.py @@ -1,6 +1,6 @@ """Schemas for domain (Stage 0). -Defines Domain dataclass representing the domain being evaluated in the experiment. +Defines Domain dataclass for domain. """ from dataclasses import dataclass diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py index 94f0699..e9e3862 100644 --- a/src/schemas/solution_schemas.py +++ b/src/schemas/solution_schemas.py @@ -1,7 +1,7 @@ """Schemas for solution generation stage (Stage 4). -Defines TaskSolution dataclass representing a complete solution for a task, including -the solution text, reasoning, and optional numerical answer. +Defines TaskSolution dataclass for task solution, including solution text, +reasoning, and optional numerical answer. """ from dataclasses import dataclass, field diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py index 8e5fcd5..ac241c3 100644 --- a/src/schemas/task_schemas.py +++ b/src/schemas/task_schemas.py @@ -1,7 +1,7 @@ """Schemas for task generation stage (Stage 3). -Defines Task dataclass representing a specific task for a capability. Tasks are -concrete evaluation items that test a capability (e.g., "Create a monthly budget"). +Defines Task dataclass for task. Tasks are concrete evaluation items +that test a capability (e.g., "Create a monthly budget"). """ from dataclasses import dataclass diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py index 07bdafd..52c33be 100644 --- a/src/schemas/validation_schemas.py +++ b/src/schemas/validation_schemas.py @@ -1,7 +1,7 @@ """Schemas for validation stage (Stage 5). -Defines ValidationResult dataclass representing the validation outcome for a task, -including verification status, feedback, and optional score. +Defines ValidationResult dataclass for validation result, including +verification status, feedback, and optional score. """ from dataclasses import dataclass, field From 0daa522dcf79bdd0c39b31bf054d3b9d8e5add1b Mon Sep 17 00:00:00 2001 From: kohankhaki Date: Thu, 4 Dec 2025 16:10:21 -0500 Subject: [PATCH 7/7] Make Area and Capability description fields required instead of optional. Add file references to all dataclass sections in PIPELINE_SCHEMAS.md documentation. --- src/schemas/PIPELINE_SCHEMAS.md | 40 +++++++++++------ src/schemas/area_schemas.py | 28 ++++++------ src/schemas/capability_schemas.py | 48 +++++++++------------ src/schemas/solution_schemas.py | 71 ++++++++++++++----------------- src/schemas/task_schemas.py | 65 ++++++++++++---------------- src/schemas/validation_schemas.py | 71 ++++++++++++++----------------- 6 files changed, 153 insertions(+), 170 deletions(-) diff --git a/src/schemas/PIPELINE_SCHEMAS.md b/src/schemas/PIPELINE_SCHEMAS.md index e7ca883..d2b8b86 100644 --- a/src/schemas/PIPELINE_SCHEMAS.md +++ b/src/schemas/PIPELINE_SCHEMAS.md @@ -209,6 +209,8 @@ All dataclasses used across pipeline stages are defined below. Stage implementat ### PipelineMetadata +**File:** [`metadata_schemas.py`](metadata_schemas.py) + All pipeline outputs include a `metadata` object (represented by the `PipelineMetadata` dataclass) that provides pipeline execution context and traceability. **Required Fields:** @@ -228,6 +230,8 @@ All pipeline outputs include a `metadata` object (represented by the `PipelineMe ### Experiment +**File:** [`experiment_schemas.py`](experiment_schemas.py) + **Fields:** - `experiment_id`: String (required, experiment identifier) - `domain`: String (required, human-readable domain name) @@ -237,6 +241,8 @@ All pipeline outputs include a `metadata` object (represented by the `PipelineMe ### Domain +**File:** [`domain_schemas.py`](domain_schemas.py) + **Fields:** - `name`: String (required, human-readable domain name) - `domain_id`: String (required) @@ -244,11 +250,13 @@ All pipeline outputs include a `metadata` object (represented by the `PipelineMe ### Area +**File:** [`area_schemas.py`](area_schemas.py) + **Fields:** - `name`: String (required, human-readable area name) - `area_id`: String (required) -- `description`: String (optional, area description) -- `domain`: Optional[Domain] (optional, Domain dataclass object) +- `domain`: Domain (required, Domain dataclass object) +- `description`: String (required, area description) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any generation-specific data (e.g., generation method, parameters, intermediate steps) - Structure is flexible and depends on the generation method @@ -257,11 +265,13 @@ All pipeline outputs include a `metadata` object (represented by the `PipelineMe ### Capability +**File:** [`capability_schemas.py`](capability_schemas.py) + **Fields:** - `name`: String (required, capability name) - `capability_id`: String (required) -- `description`: String (optional, capability description) -- `area`: Optional[Area] (optional, Area dataclass object) +- `area`: Area (required, Area dataclass object) +- `description`: String (required, capability description) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any generation-specific data (e.g., generation method, parameters, intermediate steps) - Structure is flexible and depends on the generation method @@ -270,40 +280,46 @@ All pipeline outputs include a `metadata` object (represented by the `PipelineMe ### Task +**File:** [`task_schemas.py`](task_schemas.py) + **Fields:** - `task_id`: String (required, unique within capability) - `task`: String (required, the task/problem text) -- `capability`: Optional[Capability] (optional, Capability dataclass object) +- `capability`: Capability (required, Capability dataclass object) **Note:** When serialized to JSON, the `capability` object is flattened to `capability` (string), `capability_id` (string), `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. ### TaskSolution +**File:** [`solution_schemas.py`](solution_schemas.py) + **Fields:** - `task_id`: String (required) - `task`: String (required, the task/problem text from Stage 3) - `solution`: String (required, the final solution) - `reasoning`: String (required, explanation of the solution) +- `task_obj`: Task (required, Task dataclass object with full hierarchy) - `numerical_answer`: String (optional, JSON string with numerical results) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any generation-specific data (e.g., debate rounds, agent interactions, pipeline type) - Structure is flexible and depends on the generation method (agentic, single-agent, etc.) -- `task_obj`: Optional[Task] (optional, Task dataclass object with full hierarchy) **Note:** When serialized to JSON, the `task_obj` object is flattened to `capability` (string), `capability_id` (string), `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. ### ValidationResult +**File:** [`validation_schemas.py`](validation_schemas.py) + **Fields:** - `task_id`: String (required) - `task`: String (required, the task/problem text from Stage 3) - `verification`: Boolean (required, overall validation status - whether the solution is verified/valid) - `feedback`: String (required, detailed feedback on the validation) +- `task_obj`: Task (required, Task dataclass object with full hierarchy) - `score`: Float (optional, validation score, typically 0.0 to 1.0) - `generation_metadata`: Dict (optional, nested dictionary containing process-specific information) - This field can contain any validation-specific data (e.g., validation method, criteria details, error details) - Structure is flexible and depends on the validation method -- `task_obj`: Optional[Task] (optional, Task dataclass object with full hierarchy) **Note:** When serialized to JSON, the `task_obj` object is flattened to `capability` (string), `capability_id` (string), `area` (string), `area_id` (string), `domain` (string), and `domain_id` (string) fields. @@ -333,7 +349,7 @@ This stage creates two files: #### Output 1: `experiment.json` **Stage Output:** Experiment dataclass + PipelineMetadata -**Save Function:** `save_experiment(experiment: Experiment, metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_experiment(experiment: Experiment, metadata: PipelineMetadata, output_path: Path)` (see [`io_utils.py`](io_utils.py)) **File Path:** `//experiment.json` @@ -364,7 +380,7 @@ This stage creates two files: #### Output 2: `domain.json` **Stage Output:** Domain dataclass object + PipelineMetadata -**Save Function:** `save_domain(domain: Domain, metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_domain(domain: Domain, metadata: PipelineMetadata, output_path: Path)` (see [`io_utils.py`](io_utils.py)) **File Path:** `//domain/domain.json` @@ -402,7 +418,7 @@ This stage creates two files: ### Output: `areas.json` **Stage Output:** List[Area] dataclasses + PipelineMetadata -**Save Function:** `save_areas(areas: List[Area], metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_areas(areas: List[Area], metadata: PipelineMetadata, output_path: Path)` (see [`io_utils.py`](io_utils.py)) **File Path:** `//areas//areas.json` ```json @@ -447,7 +463,7 @@ This stage creates two files: ### Output: `capabilities.json` (one per area) **Stage Output:** List[Capability] dataclasses + PipelineMetadata -**Save Function:** `save_capabilities(capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_capabilities(capabilities: List[Capability], metadata: PipelineMetadata, output_path: Path)` (see [`io_utils.py`](io_utils.py)) **File Path:** `//capabilities///capabilities.json` @@ -495,7 +511,7 @@ This stage creates two files: ### Output: `tasks.json` (one per capability) **Stage Output:** List[Task] dataclasses + PipelineMetadata -**Save Function:** `save_tasks(tasks: List[Task], metadata: PipelineMetadata, output_path: Path)` +**Save Function:** `save_tasks(tasks: List[Task], metadata: PipelineMetadata, output_path: Path)` (see [`io_utils.py`](io_utils.py)) **File Path:** `//tasks////tasks.json` diff --git a/src/schemas/area_schemas.py b/src/schemas/area_schemas.py index acd1658..311eb8a 100644 --- a/src/schemas/area_schemas.py +++ b/src/schemas/area_schemas.py @@ -1,7 +1,7 @@ """Schemas for area generation stage (Stage 1). Defines Area dataclass for domain area. Areas are high-level categories -within a domain (e.g., "Budgeting" within "Personal Finance"). +within a domain. """ from dataclasses import dataclass, field @@ -16,8 +16,8 @@ class Area: name: str area_id: str - description: Optional[str] = None - domain: Optional[Domain] = None + domain: Domain + description: str generation_metadata: Optional[Dict] = field(default_factory=dict) def to_dict(self): @@ -25,12 +25,10 @@ def to_dict(self): result = { "name": self.name, "area_id": self.area_id, + "domain": self.domain.name, + "domain_id": self.domain.domain_id, + "description": self.description, } - if self.domain is not None: - result["domain"] = self.domain.name - result["domain_id"] = self.domain.domain_id - if self.description is not None: - result["description"] = self.description if self.generation_metadata: result["generation_metadata"] = self.generation_metadata return result @@ -38,17 +36,15 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" - domain = None - if "domain" in data and "domain_id" in data: - domain = Domain( - name=data["domain"], - domain_id=data["domain_id"], - description=None, - ) + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=data.get("domain_description"), + ) return cls( name=data["name"], area_id=data["area_id"], - description=data.get("description"), domain=domain, + description=data["description"], generation_metadata=data.get("generation_metadata", {}), ) diff --git a/src/schemas/capability_schemas.py b/src/schemas/capability_schemas.py index e300829..8cfc74c 100644 --- a/src/schemas/capability_schemas.py +++ b/src/schemas/capability_schemas.py @@ -1,7 +1,7 @@ """Schemas for capability generation stage (Stage 2). Defines Capability dataclass for capability within an area. Capabilities -are specific skills or abilities (e.g., "Budget Creation" within "Budgeting" area). +are specific skills or abilities. """ from dataclasses import dataclass, field @@ -17,8 +17,8 @@ class Capability: name: str capability_id: str - description: Optional[str] = None - area: Optional[Area] = None + area: Area + description: str generation_metadata: Optional[Dict] = field(default_factory=dict) def to_dict(self): @@ -26,15 +26,13 @@ def to_dict(self): result = { "name": self.name, "capability_id": self.capability_id, + "area": self.area.name, + "area_id": self.area.area_id, + "area_description": self.area.description, + "domain": self.area.domain.name, + "domain_id": self.area.domain.domain_id, + "description": self.description, } - if self.area is not None: - result["area"] = self.area.name - result["area_id"] = self.area.area_id - if self.area.domain is not None: - result["domain"] = self.area.domain.name - result["domain_id"] = self.area.domain.domain_id - if self.description is not None: - result["description"] = self.description if self.generation_metadata: result["generation_metadata"] = self.generation_metadata return result @@ -42,25 +40,21 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" - area = None - if "area" in data and "area_id" in data: - domain = None - if "domain" in data and "domain_id" in data: - domain = Domain( - name=data["domain"], - domain_id=data["domain_id"], - description=None, - ) - area = Area( - name=data["area"], - area_id=data["area_id"], - description=None, - domain=domain, - ) + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=data.get("domain_description"), + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + domain=domain, + description=data["area_description"], + ) return cls( name=data["name"], capability_id=data["capability_id"], - description=data.get("description"), area=area, + description=data["description"], generation_metadata=data.get("generation_metadata", {}), ) diff --git a/src/schemas/solution_schemas.py b/src/schemas/solution_schemas.py index e9e3862..e4547a7 100644 --- a/src/schemas/solution_schemas.py +++ b/src/schemas/solution_schemas.py @@ -21,9 +21,9 @@ class TaskSolution: task: str solution: str reasoning: str + task_obj: Task numerical_answer: Optional[str] = None generation_metadata: Optional[Dict] = field(default_factory=dict) - task_obj: Optional[Task] = None # Full task object with hierarchy def to_dict(self): """Convert to dictionary.""" @@ -32,16 +32,15 @@ def to_dict(self): "task": self.task, "solution": self.solution, "reasoning": self.reasoning, + "capability_id": self.task_obj.capability.capability_id, + "capability": self.task_obj.capability.name, + "capability_description": self.task_obj.capability.description, + "area": self.task_obj.capability.area.name, + "area_id": self.task_obj.capability.area.area_id, + "area_description": self.task_obj.capability.area.description, + "domain": self.task_obj.capability.area.domain.name, + "domain_id": self.task_obj.capability.area.domain.domain_id, } - if self.task_obj is not None and self.task_obj.capability is not None: - result["capability_id"] = self.task_obj.capability.capability_id - result["capability"] = self.task_obj.capability.name - if self.task_obj.capability.area is not None: - result["area"] = self.task_obj.capability.area.name - result["area_id"] = self.task_obj.capability.area.area_id - if self.task_obj.capability.area.domain is not None: - result["domain"] = self.task_obj.capability.area.domain.name - result["domain_id"] = self.task_obj.capability.area.domain.domain_id if self.numerical_answer is not None: result["numerical_answer"] = self.numerical_answer if self.generation_metadata: @@ -51,40 +50,34 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" - task_obj = None - if "capability" in data and "capability_id" in data: - area = None - if "area" in data and "area_id" in data: - domain = None - if "domain" in data and "domain_id" in data: - domain = Domain( - name=data["domain"], - domain_id=data["domain_id"], - description=None, - ) - area = Area( - name=data["area"], - area_id=data["area_id"], - description=None, - domain=domain, - ) - capability = Capability( - name=data["capability"], - capability_id=data["capability_id"], - description=None, - area=area, - ) - task_obj = Task( - task_id=data["task_id"], - task=data["task"], - capability=capability, - ) + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=data.get("domain_description"), + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + domain=domain, + description=data["area_description"], + ) + capability = Capability( + name=data["capability"], + capability_id=data["capability_id"], + area=area, + description=data["capability_description"], + ) + task_obj = Task( + task_id=data["task_id"], + task=data["task"], + capability=capability, + ) return cls( task_id=data["task_id"], task=data["task"], solution=data["solution"], reasoning=data["reasoning"], + task_obj=task_obj, numerical_answer=data.get("numerical_answer"), generation_metadata=data.get("generation_metadata", {}), - task_obj=task_obj, ) diff --git a/src/schemas/task_schemas.py b/src/schemas/task_schemas.py index ac241c3..cbf8865 100644 --- a/src/schemas/task_schemas.py +++ b/src/schemas/task_schemas.py @@ -1,11 +1,10 @@ """Schemas for task generation stage (Stage 3). Defines Task dataclass for task. Tasks are concrete evaluation items -that test a capability (e.g., "Create a monthly budget"). +that test a capability. """ from dataclasses import dataclass -from typing import Optional from src.schemas.area_schemas import Area from src.schemas.capability_schemas import Capability @@ -18,51 +17,43 @@ class Task: task_id: str task: str - capability: Optional[Capability] = None + capability: Capability def to_dict(self): """Convert to dictionary.""" - result = { + return { "task_id": self.task_id, "task": self.task, + "capability_id": self.capability.capability_id, + "capability": self.capability.name, + "capability_description": self.capability.description, + "area": self.capability.area.name, + "area_id": self.capability.area.area_id, + "area_description": self.capability.area.description, + "domain": self.capability.area.domain.name, + "domain_id": self.capability.area.domain.domain_id, } - if self.capability is not None: - result["capability_id"] = self.capability.capability_id - result["capability"] = self.capability.name - if self.capability.area is not None: - result["area"] = self.capability.area.name - result["area_id"] = self.capability.area.area_id - if self.capability.area.domain is not None: - result["domain"] = self.capability.area.domain.name - result["domain_id"] = self.capability.area.domain.domain_id - return result @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" - capability = None - if "capability" in data and "capability_id" in data: - area = None - if "area" in data and "area_id" in data: - domain = None - if "domain" in data and "domain_id" in data: - domain = Domain( - name=data["domain"], - domain_id=data["domain_id"], - description=None, - ) - area = Area( - name=data["area"], - area_id=data["area_id"], - description=None, - domain=domain, - ) - capability = Capability( - name=data["capability"], - capability_id=data["capability_id"], - description=None, - area=area, - ) + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=data.get("domain_description"), + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + domain=domain, + description=data["area_description"], + ) + capability = Capability( + name=data["capability"], + capability_id=data["capability_id"], + area=area, + description=data["capability_description"], + ) return cls( task_id=data["task_id"], task=data["task"], diff --git a/src/schemas/validation_schemas.py b/src/schemas/validation_schemas.py index 52c33be..02ec8ee 100644 --- a/src/schemas/validation_schemas.py +++ b/src/schemas/validation_schemas.py @@ -21,9 +21,9 @@ class ValidationResult: task: str verification: bool feedback: str + task_obj: Task score: Optional[float] = None generation_metadata: Optional[Dict] = field(default_factory=dict) - task_obj: Optional[Task] = None # Full task object with hierarchy def to_dict(self): """Convert to dictionary.""" @@ -32,16 +32,15 @@ def to_dict(self): "task": self.task, "verification": self.verification, "feedback": self.feedback, + "capability_id": self.task_obj.capability.capability_id, + "capability": self.task_obj.capability.name, + "capability_description": self.task_obj.capability.description, + "area": self.task_obj.capability.area.name, + "area_id": self.task_obj.capability.area.area_id, + "area_description": self.task_obj.capability.area.description, + "domain": self.task_obj.capability.area.domain.name, + "domain_id": self.task_obj.capability.area.domain.domain_id, } - if self.task_obj is not None and self.task_obj.capability is not None: - result["capability_id"] = self.task_obj.capability.capability_id - result["capability"] = self.task_obj.capability.name - if self.task_obj.capability.area is not None: - result["area"] = self.task_obj.capability.area.name - result["area_id"] = self.task_obj.capability.area.area_id - if self.task_obj.capability.area.domain is not None: - result["domain"] = self.task_obj.capability.area.domain.name - result["domain_id"] = self.task_obj.capability.area.domain.domain_id if self.score is not None: result["score"] = self.score if self.generation_metadata: @@ -51,40 +50,34 @@ def to_dict(self): @classmethod def from_dict(cls, data: dict): """Create from dictionary.""" - task_obj = None - if "capability" in data and "capability_id" in data: - area = None - if "area" in data and "area_id" in data: - domain = None - if "domain" in data and "domain_id" in data: - domain = Domain( - name=data["domain"], - domain_id=data["domain_id"], - description=None, - ) - area = Area( - name=data["area"], - area_id=data["area_id"], - description=None, - domain=domain, - ) - capability = Capability( - name=data["capability"], - capability_id=data["capability_id"], - description=None, - area=area, - ) - task_obj = Task( - task_id=data["task_id"], - task=data["task"], - capability=capability, - ) + domain = Domain( + name=data["domain"], + domain_id=data["domain_id"], + description=data.get("domain_description"), + ) + area = Area( + name=data["area"], + area_id=data["area_id"], + domain=domain, + description=data["area_description"], + ) + capability = Capability( + name=data["capability"], + capability_id=data["capability_id"], + area=area, + description=data["capability_description"], + ) + task_obj = Task( + task_id=data["task_id"], + task=data["task"], + capability=capability, + ) return cls( task_id=data["task_id"], task=data["task"], verification=data["verification"], feedback=data["feedback"], + task_obj=task_obj, score=data.get("score"), generation_metadata=data.get("generation_metadata", {}), - task_obj=task_obj, )