VectorInstitute
diff --git a/‎experimental/diverse_task_config.yaml‎
Lines changed: 89 additions & 0 deletions b/‎experimental/diverse_task_config.yaml‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎experimental/diverse_task_dataclasses.py‎
Lines changed: 77 additions & 0 deletions b/‎experimental/diverse_task_dataclasses.py‎
Lines changed: 77 additions & 0 deletions
@@ -0,0 +1,89 @@
+# Configuration for Diverse Task Generator
+
+# Model settings
+model:
+  name: gpt-4o  # OpenAI model to use
+  temperature: 1.0  # Temperature for all steps
+  max_tokens: 8192  # Max tokens for all steps
+
+# Task generation settings
+generation:
+  tasks_per_blueprint: 3  # Number of tasks to generate per blueprint
+  min_subtopics: 3  # Suggested minimum number of sub-topics
+  max_subtopics: 8  # Suggested maximum number of sub-topics
+
+# Output settings
+output:
+  base_dir: diverse_task_outputs
+  save_intermediate_steps: true  # Save each step's output
+  pretty_print_json: true  # Indent JSON files
+
+# Input settings
+input:
+  capability_json_path: capability.json  # Default capability JSON file path
+
+# Bloom's Taxonomy definitions
+# Source: Revised Bloom's Taxonomy (Anderson & Krathwohl, 2001)
+blooms_taxonomy:
+  Remember:
+    description: "Retrieving relevant knowledge from long-term memory. Involves recognizing and recalling facts, terms, basic concepts, or answers."
+    keywords: ["define", "list", "identify", "recall", "name", "state"]
+
+  Understand:
+    description: "Constructing meaning from instructional messages. Involves interpreting, exemplifying, classifying, summarizing, inferring, comparing, and explaining."
+    keywords: ["explain", "describe", "interpret", "summarize", "compare", "contrast"]
+
+  Apply:
+    description: "Carrying out or using a procedure in a given situation. Involves executing or implementing a method, technique, or process."
+    keywords: ["apply", "use", "implement", "execute", "solve", "demonstrate"]
+
+  Analyze:
+    description: "Breaking material into constituent parts and determining how parts relate to one another and to an overall structure. Involves differentiating, organizing, and attributing."
+    keywords: ["analyze", "differentiate", "organize", "distinguish", "examine", "compare"]
+
+  Evaluate:
+    description: "Making judgments based on criteria and standards. Involves checking for internal consistency or logical fallacies, and critiquing based on external criteria."
+    keywords: ["evaluate", "judge", "critique", "assess", "justify", "argue"]
+
+  Create:
+    description: "Putting elements together to form a novel, coherent whole or make an original product. Involves generating, planning, and producing."
+    keywords: ["create", "design", "construct", "develop", "formulate", "generate"]
+
+# Difficulty level definitions
+difficulty_levels:
+  easy:
+    description: "Basic, straightforward problems requiring minimal steps and fundamental knowledge."
+    characteristics:
+      - "Single concept application"
+      - "Direct recall or simple calculation"
+      - "Clear and unambiguous"
+      - "Minimal prerequisite knowledge"
+
+  medium:
+    description: "Moderate complexity requiring multiple steps, integration of concepts, or non-trivial reasoning."
+    characteristics:
+      - "Multiple concept integration"
+      - "Multi-step solution required"
+      - "Some prerequisite knowledge needed"
+      - "May involve edge cases"
+
+  hard:
+    description: "Complex, challenging problems requiring deep understanding, multiple concepts, edge cases, or sophisticated reasoning."
+    characteristics:
+      - "Complex multi-concept integration"
+      - "Multiple challenging steps"
+      - "Deep domain knowledge required"
+      - "Edge cases and exceptions"
+      - "May require insight or creative approach"
+
+# Verification criteria
+verification:
+  pass_threshold: 0.8  # Minimum pass rate to consider successful
+  strict_mode: false  # If true, all alignment criteria must pass
+
+# Example capability for quick testing
+example_capability:
+  name: "compound_interest_calculations"
+  description: "The ability to calculate compound interest for various scenarios, including different compounding frequencies (annually, semi-annually, quarterly, monthly), different time periods, and understanding how changes in principal, rate, or time affect the final amount."
+  domain: "personal_finance"
+  area: "investing_and_savings"
@@ -0,0 +1,77 @@
+"""Dataclasses for the diverse task generation pipeline."""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+
+@dataclass
+class Capability:
+    """Represents a capability to be tested."""
+
+    name: str
+    description: str
+    domain: str
+    area: Optional[str] = None
+    example_tasks: List[Dict] = field(default_factory=list)
+
+
+@dataclass
+class SubTopic:
+    """Represents a sub-topic within a capability."""
+
+    name: str
+    description: Optional[str] = None
+
+
+@dataclass
+class Combination:
+    """Represents a valid (content, difficulty, reasoning) combination."""
+
+    content: str
+    difficulty: str
+    reasoning: str
+    rationale: Optional[str] = None
+
+
+@dataclass
+class Blueprint:
+    """Represents a task blueprint for a specific combination."""
+
+    combination_id: int
+    subtopic: str
+    difficulty: str
+    reasoning: str
+    blueprint: str
+    key_characteristics: List[str] = field(default_factory=list)
+    example_question_outline: Optional[str] = None
+    rationale: Optional[str] = None
+
+
+@dataclass
+class Task:
+    """Represents a generated multiple-choice task."""
+
+    task_id: str
+    blueprint_id: int
+    subtopic: str
+    difficulty: str
+    reasoning: str
+    question: str
+    choices: Dict[str, str]
+    correct_answer: str
+    explanation: Optional[str] = None
+    alignment_notes: Optional[str] = None
+
+
+@dataclass
+class VerificationResult:
+    """Represents the verification result for a task."""
+
+    task_id: str
+    subtopic_aligned: bool
+    difficulty_aligned: bool
+    reasoning_aligned: bool
+    choices_appropriate: bool
+    overall_aligned: bool
+    feedback: str
+    suggested_improvements: Optional[str] = None