agentscope-ai
diff --git a/‎cookbooks/zero_shot_evaluation/__init__.py‎
Lines changed: 28 additions & 24 deletions b/‎cookbooks/zero_shot_evaluation/__init__.py‎
Lines changed: 28 additions & 24 deletions
diff --git a/‎cookbooks/zero_shot_evaluation/__main__.py‎
Lines changed: 8 additions & 10 deletions b/‎cookbooks/zero_shot_evaluation/__main__.py‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎…/zero_shot_evaluation/core/checkpoint.py‎ ‎…books/zero_shot_evaluation/checkpoint.py‎cookbooks/zero_shot_evaluation/core/checkpoint.py renamed to cookbooks/zero_shot_evaluation/checkpoint.py
Lines changed: 40 additions & 41 deletions b/‎…/zero_shot_evaluation/core/checkpoint.py‎ ‎…books/zero_shot_evaluation/checkpoint.py‎cookbooks/zero_shot_evaluation/core/checkpoint.py renamed to cookbooks/zero_shot_evaluation/checkpoint.py
Lines changed: 40 additions & 41 deletions
@@ -1,43 +1,47 @@
 # -*- coding: utf-8 -*-
-"""Zero-Shot Evaluation module for comparing models and agent pipelines.
+"""Core modules for zero-shot evaluation.
 
-Usage:
-    # CLI
-    python -m cookbooks.zero_shot_evaluation --config config.yaml
+This package contains the core components for the zero-shot evaluation pipeline:
+- ZeroShotPipeline: End-to-end evaluation pipeline
+- QueryGenerator: Test query generation
+- ResponseCollector: Response collection from endpoints
 
-    # Python
-    from cookbooks.zero_shot_evaluation import ZeroShotEvaluator
-    evaluator = ZeroShotEvaluator.from_config("config.yaml")
-    result = await evaluator.evaluate()
+Note: RubricGenerator has been moved to openjudge.generator module for better reusability.
+Note: Checkpoint management is integrated into ZeroShotPipeline.
 """
 
-from cookbooks.zero_shot_evaluation.core.config import load_config
-from cookbooks.zero_shot_evaluation.core.evaluator import EvaluationResult, ZeroShotEvaluator
-from cookbooks.zero_shot_evaluation.core.query_generator import QueryGenerator
-from cookbooks.zero_shot_evaluation.core.response_collector import ResponseCollector
-from cookbooks.zero_shot_evaluation.core.rubric_generator import RubricGenerator
-from cookbooks.zero_shot_evaluation.core.schema import (
+from cookbooks.zero_shot_evaluation.query_generator import QueryGenerator
+from cookbooks.zero_shot_evaluation.response_collector import ResponseCollector
+from cookbooks.zero_shot_evaluation.schema import (
     EvaluationConfig,
+    GeneratedQuery,
     OpenAIEndpoint,
     QueryGenerationConfig,
     TaskConfig,
     ZeroShotConfig,
+    load_config,
+)
+from cookbooks.zero_shot_evaluation.zero_shot_pipeline import (
+    EvaluationResult,
+    EvaluationStage,
+    ZeroShotPipeline,
 )
 
 __all__ = [
     # Config
-    "ZeroShotConfig",
-    "TaskConfig",
-    "OpenAIEndpoint",
-    "QueryGenerationConfig",
-    "EvaluationConfig",
     "load_config",
+    # Pipeline
+    "ZeroShotPipeline",
+    "EvaluationResult",
+    "EvaluationStage",
     # Components
     "QueryGenerator",
     "ResponseCollector",
-    "RubricGenerator",
-    # Evaluator
-    "ZeroShotEvaluator",
-    "EvaluationResult",
+    # Schema
+    "EvaluationConfig",
+    "GeneratedQuery",
+    "OpenAIEndpoint",
+    "QueryGenerationConfig",
+    "TaskConfig",
+    "ZeroShotConfig",
 ]
-
 
@@ -15,9 +15,8 @@
 import fire
 from loguru import logger
 
-from cookbooks.zero_shot_evaluation.core.config import load_config
-from cookbooks.zero_shot_evaluation.core.evaluator import ZeroShotEvaluator
-from cookbooks.zero_shot_evaluation.core.schema import GeneratedQuery
+from cookbooks.zero_shot_evaluation.schema import GeneratedQuery, load_config
+from cookbooks.zero_shot_evaluation.zero_shot_pipeline import ZeroShotPipeline
 
 
 def _load_queries_from_file(queries_file: str) -> List[GeneratedQuery]:
@@ -55,11 +54,11 @@ async def _run_evaluation(
     if queries_file:
         queries = _load_queries_from_file(queries_file)
 
-    evaluator = ZeroShotEvaluator(config=config, resume=resume)
-    result = await evaluator.evaluate(queries=queries)
+    pipeline = ZeroShotPipeline(config=config, resume=resume)
+    result = await pipeline.evaluate(queries=queries)
 
     if save:
-        evaluator.save_results(result, output_dir)
+        pipeline.save_results(result, output_dir)
 
 
 def main(
@@ -81,10 +80,10 @@ def main(
     Examples:
         # Normal run (auto-resumes from checkpoint)
         python -m cookbooks.zero_shot_evaluation --config config.yaml --save
-        
+
         # Use pre-generated queries
         python -m cookbooks.zero_shot_evaluation --config config.yaml --queries_file queries.json --save
-        
+
         # Start fresh, ignore checkpoint
         python -m cookbooks.zero_shot_evaluation --config config.yaml --fresh --save
     """
@@ -106,10 +105,9 @@ def main(
         logger.info("Starting fresh (ignoring checkpoint)")
     else:
         logger.info("Resume mode enabled (will continue from checkpoint if exists)")
-    
+
     asyncio.run(_run_evaluation(str(config_path), output_dir, queries_file, save, resume=not fresh))
 
 
 if __name__ == "__main__":
     fire.Fire(main)
-
@@ -10,12 +10,12 @@
 from loguru import logger
 from pydantic import BaseModel, Field
 
-from cookbooks.zero_shot_evaluation.core.schema import GeneratedQuery
+from cookbooks.zero_shot_evaluation.schema import GeneratedQuery
 
 
 class EvaluationStage(str, Enum):
     """Evaluation pipeline stages."""
-    
+
     NOT_STARTED = "not_started"
     QUERIES_GENERATED = "queries_generated"
     RESPONSES_COLLECTED = "responses_collected"
@@ -25,16 +25,16 @@ class EvaluationStage(str, Enum):
 
 class CheckpointData(BaseModel):
     """Checkpoint data model."""
-    
+
     stage: EvaluationStage = Field(default=EvaluationStage.NOT_STARTED)
     created_at: str = Field(default_factory=lambda: datetime.now().isoformat())
     updated_at: str = Field(default_factory=lambda: datetime.now().isoformat())
-    
+
     # Data files
     queries_file: Optional[str] = None
     responses_file: Optional[str] = None
     rubrics_file: Optional[str] = None
-    
+
     # Progress tracking
     total_queries: int = 0
     collected_responses: int = 0
@@ -44,32 +44,32 @@ class CheckpointData(BaseModel):
 
 class CheckpointManager:
     """Manage evaluation checkpoints for resume capability."""
-    
+
     CHECKPOINT_FILE = "checkpoint.json"
     QUERIES_FILE = "queries.json"
     RESPONSES_FILE = "responses.json"
     RUBRICS_FILE = "rubrics.json"
-    
+
     def __init__(self, output_dir: str):
         """Initialize checkpoint manager.
-        
+
         Args:
             output_dir: Directory to store checkpoint files
         """
         self.output_dir = Path(output_dir)
         self.output_dir.mkdir(parents=True, exist_ok=True)
         self._checkpoint: Optional[CheckpointData] = None
-    
+
     @property
     def checkpoint_path(self) -> Path:
         return self.output_dir / self.CHECKPOINT_FILE
-    
+
     def load(self) -> Optional[CheckpointData]:
         """Load existing checkpoint if available."""
         if not self.checkpoint_path.exists():
             logger.info("No checkpoint found, starting fresh")
             return None
-        
+
         try:
             with open(self.checkpoint_path, "r", encoding="utf-8") as f:
                 data = json.load(f)
@@ -79,87 +79,87 @@ def load(self) -> Optional[CheckpointData]:
         except Exception as e:
             logger.warning(f"Failed to load checkpoint: {e}")
             return None
-    
+
     def save(self, checkpoint: CheckpointData) -> None:
         """Save checkpoint to file."""
         checkpoint.updated_at = datetime.now().isoformat()
         self._checkpoint = checkpoint
-        
+
         with open(self.checkpoint_path, "w", encoding="utf-8") as f:
             json.dump(checkpoint.model_dump(), f, indent=2, ensure_ascii=False)
-        
+
         logger.debug(f"Checkpoint saved: stage={checkpoint.stage.value}")
-    
+
     def save_queries(self, queries: List[GeneratedQuery]) -> str:
         """Save generated queries."""
         file_path = self.output_dir / self.QUERIES_FILE
-        
+
         with open(file_path, "w", encoding="utf-8") as f:
             json.dump([q.model_dump() for q in queries], f, indent=2, ensure_ascii=False)
-        
+
         logger.info(f"Saved {len(queries)} queries to {file_path}")
         return str(file_path)
-    
+
     def load_queries(self) -> List[GeneratedQuery]:
         """Load saved queries."""
         file_path = self.output_dir / self.QUERIES_FILE
-        
+
         if not file_path.exists():
             return []
-        
+
         with open(file_path, "r", encoding="utf-8") as f:
             data = json.load(f)
-        
+
         queries = [GeneratedQuery(**item) for item in data]
         logger.info(f"Loaded {len(queries)} queries from {file_path}")
         return queries
-    
+
     def save_responses(self, responses: List[Dict[str, Any]]) -> str:
         """Save collected responses."""
         file_path = self.output_dir / self.RESPONSES_FILE
-        
+
         with open(file_path, "w", encoding="utf-8") as f:
             json.dump(responses, f, indent=2, ensure_ascii=False)
-        
+
         logger.info(f"Saved {len(responses)} responses to {file_path}")
         return str(file_path)
-    
+
     def load_responses(self) -> List[Dict[str, Any]]:
         """Load saved responses."""
         file_path = self.output_dir / self.RESPONSES_FILE
-        
+
         if not file_path.exists():
             return []
-        
+
         with open(file_path, "r", encoding="utf-8") as f:
             responses = json.load(f)
-        
+
         logger.info(f"Loaded {len(responses)} responses from {file_path}")
         return responses
-    
+
     def save_rubrics(self, rubrics: List[str]) -> str:
         """Save generated rubrics."""
         file_path = self.output_dir / self.RUBRICS_FILE
-        
+
         with open(file_path, "w", encoding="utf-8") as f:
             json.dump(rubrics, f, indent=2, ensure_ascii=False)
-        
+
         logger.info(f"Saved {len(rubrics)} rubrics to {file_path}")
         return str(file_path)
-    
+
     def load_rubrics(self) -> List[str]:
         """Load saved rubrics."""
         file_path = self.output_dir / self.RUBRICS_FILE
-        
+
         if not file_path.exists():
             return []
-        
+
         with open(file_path, "r", encoding="utf-8") as f:
             rubrics = json.load(f)
-        
+
         logger.info(f"Loaded {len(rubrics)} rubrics from {file_path}")
         return rubrics
-    
+
     def update_stage(
         self,
         stage: EvaluationStage,
@@ -168,14 +168,14 @@ def update_stage(
         """Update checkpoint stage and save."""
         if self._checkpoint is None:
             self._checkpoint = CheckpointData()
-        
+
         self._checkpoint.stage = stage
         for key, value in kwargs.items():
             if hasattr(self._checkpoint, key):
                 setattr(self._checkpoint, key, value)
-        
+
         self.save(self._checkpoint)
-    
+
     def clear(self) -> None:
         """Clear all checkpoint data."""
         for file_name in [
@@ -187,7 +187,6 @@ def clear(self) -> None:
             file_path = self.output_dir / file_name
             if file_path.exists():
                 file_path.unlink()
-        
+
         self._checkpoint = None
         logger.info("Checkpoint cleared")
-