renaming predictors AND prompt creation

finitearth · finitearth · commit 2cbaedb028fc · 2025-11-25T13:06:43.000+01:00
diff --git a/promptolution/helpers.py b/promptolution/helpers.py
@@ -28,7 +28,8 @@
 from promptolution.optimizers.evoprompt_de import EvoPromptDE
 from promptolution.optimizers.evoprompt_ga import EvoPromptGA
 from promptolution.optimizers.opro import OPRO
-from promptolution.predictors.classifier import FirstOccurrenceClassifier, MarkerBasedClassifier
+from promptolution.predictors.first_occurence_predictor import FirstOccurrencePredictor
+from promptolution.predictors.maker_based_predictor import MarkerBasedPredictor
 from promptolution.tasks.classification_tasks import ClassificationTask
 from promptolution.utils.logging import get_logger
 from promptolution.utils.templates import (
@@ -272,23 +273,23 @@ def get_predictor(downstream_llm=None, type: "PredictorType" = "marker", *args,
     """Factory function to create and return a predictor instance.
 
     This function supports three types of predictors:
-    1. FirstOccurrenceClassifier: A predictor that classifies based on first occurrence of the label.
-    2. MarkerBasedClassifier: A predictor that classifies based on a marker.
+    1. FirstOccurrencePredictor: A predictor that classifies based on first occurrence of the label.
+    2. MarkerBasedPredictor: A predictor that classifies based on a marker.
 
     Args:
         downstream_llm: The language model to use for prediction.
         type (Literal["first_occurrence", "marker"]): The type of predictor to create:
-                    - "first_occurrence" for FirstOccurrenceClassifier
-                    - "marker" (default) for MarkerBasedClassifier
+                    - "first_occurrence" for FirstOccurrencePredictor
+                    - "marker" (default) for MarkerBasedPredictor
         *args: Variable length argument list passed to the predictor constructor.
         **kwargs: Arbitrary keyword arguments passed to the predictor constructor.
 
     Returns:
-        An instance of FirstOccurrenceClassifier or MarkerBasedClassifier.
+        An instance of FirstOccurrencePredictor or MarkerBasedPredictor.
     """
     if type == "first_occurrence":
-        return FirstOccurrenceClassifier(downstream_llm, *args, **kwargs)
+        return FirstOccurrencePredictor(downstream_llm, *args, **kwargs)
     elif type == "marker":
-        return MarkerBasedClassifier(downstream_llm, *args, **kwargs)
+        return MarkerBasedPredictor(downstream_llm, *args, **kwargs)
     else:
         raise ValueError(f"Invalid predictor type: '{type}'")
diff --git a/promptolution/optimizers/capo.py b/promptolution/optimizers/capo.py
@@ -8,8 +8,6 @@
 
 from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
-from promptolution.utils.formatting import extract_from_tag
-
 if TYPE_CHECKING:  # pragma: no cover
     from promptolution.utils.callbacks import BaseCallback
     from promptolution.llms.base_llm import BaseLLM
@@ -19,6 +17,7 @@
     from promptolution.utils.test_statistics import TestStatistics
 
 from promptolution.optimizers.base_optimizer import BaseOptimizer
+from promptolution.utils.formatting import extract_from_tag
 from promptolution.utils.logging import get_logger
 from promptolution.utils.prompt import Prompt, sort_prompts_by_scores
 from promptolution.utils.templates import CAPO_CROSSOVER_TEMPLATE, CAPO_FEWSHOT_TEMPLATE, CAPO_MUTATION_TEMPLATE
diff --git a/promptolution/predictors/__init__.py b/promptolution/predictors/__init__.py
@@ -1,4 +1,5 @@
 """Module for LLM predictors."""
 
 
-from promptolution.predictors.classifier import FirstOccurrenceClassifier, MarkerBasedClassifier
+from promptolution.predictors.first_occurence_predictor import FirstOccurrencePredictor
+from promptolution.predictors.maker_based_predictor import MarkerBasedPredictor
diff --git a/promptolution/predictors/base_predictor.py b/promptolution/predictors/base_predictor.py
@@ -10,6 +10,7 @@
 if TYPE_CHECKING:  # pragma: no cover
     from promptolution.utils.config import ExperimentConfig
 
+
 PredictorType = Literal["first_occurrence", "marker"]
 
 
diff --git a/promptolution/predictors/first_occurence_predictor.py b/promptolution/predictors/first_occurence_predictor.py
@@ -0,0 +1,65 @@
+"""Module for the FirstOccurencePredictor."""
+
+from typing import TYPE_CHECKING, List, Optional
+
+from promptolution.predictors.base_predictor import BasePredictor
+
+if TYPE_CHECKING:  # pragma: no cover
+    from promptolution.llms.base_llm import BaseLLM
+    from promptolution.utils.config import ExperimentConfig
+
+
+class FirstOccurrencePredictor(BasePredictor):
+    """A predictor class for classification tasks using language models.
+
+    This class takes a language model and a list of classes, and provides a method
+    to predict classes for given prompts and input data. The class labels are extracted
+    by matching the words in the prediction with the list of valid class labels.
+    The first occurrence of a valid class label in the prediction is used as the predicted class.
+    If no valid class label is found, the first class label in the list is used as the default prediction.
+
+    Attributes:
+        llm: The language model used for generating predictions.
+        classes (List[str]): The list of valid class labels.
+        config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
+
+    Inherits from:
+        BasePredictor: The base class for predictors in the promptolution library.
+    """
+
+    def __init__(self, llm: "BaseLLM", classes: List[str], config: Optional["ExperimentConfig"] = None) -> None:
+        """Initialize the FirstOccurrencePredictor.
+
+        Args:
+            llm: The language model to use for predictions.
+            classes (List[str]): The list of valid class labels.
+            config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
+        """
+        assert all([c.islower() for c in classes]), "Class labels should be lowercase."
+        self.classes = classes
+
+        self.extraction_description = (
+            f"The task is to classify the texts into one of those classes: {', '.join(classes)}."
+            "The first occurrence of a valid class label in the prediction is used as the predicted class."
+        )
+
+        super().__init__(llm, config)
+
+    def _extract_preds(self, preds: List[str]) -> List[str]:
+        """Extract class labels from the predictions, based on the list of valid class labels.
+
+        Args:
+            preds: The raw predictions from the language model.
+        """
+        result = []
+        for pred in preds:
+            predicted_class = self.classes[0]  # use first class as default pred
+            for word in pred.split():
+                word = "".join([c for c in word if c.isalnum()]).lower()
+                if word in self.classes:
+                    predicted_class = word
+                    break
+
+            result.append(predicted_class)
+
+        return result
diff --git a/promptolution/predictors/maker_based_predictor.py b/promptolution/predictors/maker_based_predictor.py
@@ -1,9 +1,6 @@
-"""Module for classification predictors."""
+"""Module for the MarkerBasedPredictor."""
 
-
-import numpy as np
-
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, List, Optional
 
 from promptolution.predictors.base_predictor import BasePredictor
 from promptolution.utils.formatting import extract_from_tag
@@ -13,64 +10,8 @@
     from promptolution.utils.config import ExperimentConfig
 
 
-class FirstOccurrenceClassifier(BasePredictor):
-    """A predictor class for classification tasks using language models.
-
-    This class takes a language model and a list of classes, and provides a method
-    to predict classes for given prompts and input data. The class labels are extracted
-    by matching the words in the prediction with the list of valid class labels.
-    The first occurrence of a valid class label in the prediction is used as the predicted class.
-    If no valid class label is found, the first class label in the list is used as the default prediction.
-
-    Attributes:
-        llm: The language model used for generating predictions.
-        classes (List[str]): The list of valid class labels.
-        config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
-
-    Inherits from:
-        BasePredictor: The base class for predictors in the promptolution library.
-    """
-
-    def __init__(self, llm: "BaseLLM", classes: List[str], config: Optional["ExperimentConfig"] = None) -> None:
-        """Initialize the FirstOccurrenceClassifier.
-
-        Args:
-            llm: The language model to use for predictions.
-            classes (List[str]): The list of valid class labels.
-            config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
-        """
-        assert all([c.islower() for c in classes]), "Class labels should be lowercase."
-        self.classes = classes
-
-        self.extraction_description = (
-            f"The task is to classify the texts into one of those classes: {', '.join(classes)}."
-            "The first occurrence of a valid class label in the prediction is used as the predicted class."
-        )
-
-        super().__init__(llm, config)
-
-    def _extract_preds(self, preds: List[str]) -> List[str]:
-        """Extract class labels from the predictions, based on the list of valid class labels.
-
-        Args:
-            preds: The raw predictions from the language model.
-        """
-        result = []
-        for pred in preds:
-            predicted_class = self.classes[0]  # use first class as default pred
-            for word in pred.split():
-                word = "".join([c for c in word if c.isalnum()]).lower()
-                if word in self.classes:
-                    predicted_class = word
-                    break
-
-            result.append(predicted_class)
-
-        return result
-
-
-class MarkerBasedClassifier(BasePredictor):
-    """A predictor class for classification tasks using language models.
+class MarkerBasedPredictor(BasePredictor):
+    """A predictor class task using language models.
 
     This class takes a language model and a list of classes, and provides a method
     to predict classes for given prompts and input data. The class labels are extracted.
@@ -92,7 +33,7 @@ def __init__(
         end_marker: str = "</final_answer>",
         config: Optional["ExperimentConfig"] = None,
     ) -> None:
-        """Initialize the MarkerBasedClassifier.
+        """Initialize the MarkerBasedPredictor.
 
         Args:
             llm: The language model to use for predictions.
diff --git a/promptolution/tasks/judge_tasks.py b/promptolution/tasks/judge_tasks.py
@@ -132,7 +132,7 @@ def _evaluate(self, xs: List[str], ys: List[str], preds: List[str]) -> List[floa
         judge_responses = self.judge_llm.get_response(prompts)
         scores_str = extract_from_tag(judge_responses, "<final_score>", "</final_score>")
         scores = []
-        for score_str, judge_response in zip(scores_str, judge_responses):
+        for score_str in scores_str:
             try:
                 # only numeric chars, - or . are allowed
                 score_str = "".join(filter(lambda c: c.isdigit() or c in "-.", score_str))
diff --git a/promptolution/utils/prompt_creation.py b/promptolution/utils/prompt_creation.py
@@ -14,6 +14,7 @@
 from promptolution.tasks.classification_tasks import ClassificationTask
 from promptolution.utils.templates import (
     PROMPT_CREATION_TEMPLATE,
+    PROMPT_CREATION_TEMPLATE_FROM_TASK_DESCRIPTION,
     PROMPT_CREATION_TEMPLATE_TD,
     PROMPT_VARIATION_TEMPLATE,
 )
@@ -50,7 +51,7 @@ def create_prompts_from_samples(
     llm: "BaseLLM",
     meta_prompt: Optional[str] = None,
     n_samples: int = 3,
-    task_description: Optional[str] = None,
+    task_description: str = None,
     n_prompts: int = 1,
     get_uniform_labels: bool = False,
 ) -> List[str]:
@@ -119,3 +120,33 @@ def create_prompts_from_samples(
     prompts = extract_from_tag(prompts, "<prompt>", "</prompt>")
 
     return prompts
+
+
+def create_prompts_from_task_description(
+    task_description: str,
+    llm: "BaseLLM",
+    meta_prompt: Optional[str] = None,
+    n_prompts: int = 1,
+) -> List[str]:
+    """Generate a set of prompts from a given task description.
+
+    Args:
+        task_description (str): The description of the task to generate prompts for.
+        llm (BaseLLM): The language model to use for generating the prompts.
+        meta_prompt (str): The meta prompt to use for generating the prompts.
+        If None, a default meta prompt is used.
+        n_prompts (int): The number of prompts to generate.
+
+    Returns:
+        List[str]: A list of generated prompts.
+    """
+    if meta_prompt is None:
+        meta_prompt = PROMPT_CREATION_TEMPLATE_FROM_TASK_DESCRIPTION
+
+    meta_prompt = meta_prompt.replace("<task_desc>", task_description)
+
+    meta_prompts = [meta_prompt for _ in range(n_prompts)]
+    prompts = llm.get_response(meta_prompts)
+    prompts = extract_from_tag(prompts, "<prompt>", "</prompt>")
+
+    return prompts
diff --git a/promptolution/utils/templates.py b/promptolution/utils/templates.py
@@ -138,6 +138,12 @@
 
 The instruction was"""
 
+PROMPT_CREATION_TEMPLATE_FROM_TASK_DESCRIPTION = """Please create a prompt for the following task, not using any placeholders, working universally, for any datapoint-specific instructions following each system prompt.
+
+Task: <task_desc>
+
+Explicitly state this expected format as part of the prompts."""
+
 
 DOWNSTREAM_TEMPLATE = "<instruction>"
 
diff --git a/tests/predictors/test_predictors.py b/tests/predictors/test_predictors.py
@@ -1,13 +1,13 @@
 import numpy as np
 import pytest
 
-from promptolution.helpers import FirstOccurrenceClassifier, MarkerBasedClassifier
+from promptolution.helpers import FirstOccurrencePredictor, MarkerBasedPredictor
 
 
 def test_first_occurrence_classifier(mock_downstream_llm, mock_df):
-    """Test the FirstOccurrenceClassifier."""
+    """Test the FirstOccurrencePredictor."""
     # Create classifier
-    classifier = FirstOccurrenceClassifier(llm=mock_downstream_llm, classes=mock_df["y"].values)
+    classifier = FirstOccurrencePredictor(llm=mock_downstream_llm, classes=mock_df["y"].values)
 
     # Test with multiple inputs
     xs = ["I love this product!", "I hate this product!", "This product is okay.", "ja ne"]
@@ -25,9 +25,9 @@ def test_first_occurrence_classifier(mock_downstream_llm, mock_df):
 
 
 def test_marker_based_classifier(mock_downstream_llm, mock_df):
-    """Test the MarkerBasedClassifier."""
+    """Test the MarkerBasedPredictor."""
     # Create classifier
-    classifier = MarkerBasedClassifier(
+    classifier = MarkerBasedPredictor(
         llm=mock_downstream_llm,
         classes=mock_df["y"].values,
         begin_marker="<final_answer>",
@@ -56,9 +56,9 @@ def test_marker_based_classifier(mock_downstream_llm, mock_df):
 
 
 def test_marker_based_without_classes(mock_downstream_llm):
-    """Test MarkerBasedClassifier without predefined classes."""
+    """Test MarkerBasedPredictor without predefined classes."""
     # Create classifier without classes
-    classifier = MarkerBasedClassifier(
+    predictor = MarkerBasedPredictor(
         llm=mock_downstream_llm,
         classes=None,  # No class restrictions
         begin_marker="<final_answer>",
@@ -70,7 +70,7 @@ def test_marker_based_without_classes(mock_downstream_llm):
     prompts = ["Classify:"] * len(xs)
 
     # Make predictions
-    predictions = classifier.predict(prompts, xs)
+    predictions = predictor.predict(prompts, xs)
 
     # Verify shape and content - should accept any value between markers
     assert len(predictions) == 4
@@ -83,7 +83,7 @@ def test_marker_based_without_classes(mock_downstream_llm):
 def test_multiple_prompts_with_classifiers(mock_downstream_llm, mock_df):
     """Test using multiple prompts with classifiers."""
     # Create classifier
-    classifier = FirstOccurrenceClassifier(llm=mock_downstream_llm, classes=mock_df["y"].values)
+    classifier = FirstOccurrencePredictor(llm=mock_downstream_llm, classes=mock_df["y"].values)
 
     # Test with multiple prompts
     prompts = ["Classify:", "Classify:", "Rate:", "Rate:"]
@@ -103,7 +103,7 @@ def test_multiple_prompts_with_classifiers(mock_downstream_llm, mock_df):
 def test_sequence_return_with_classifiers(mock_downstream_llm, mock_df):
     """Test return_seq parameter with classifiers."""
     # Create classifier
-    classifier = MarkerBasedClassifier(llm=mock_downstream_llm, classes=mock_df["y"].values)
+    classifier = MarkerBasedPredictor(llm=mock_downstream_llm, classes=mock_df["y"].values)
 
     # Test with return_seq=True
     prompts = ["Classify:"]
@@ -128,15 +128,15 @@ def test_invalid_class_labels(mock_downstream_llm):
 
     # Should raise an assertion error
     with pytest.raises(AssertionError):
-        FirstOccurrenceClassifier(llm=mock_downstream_llm, classes=invalid_classes)
+        FirstOccurrencePredictor(llm=mock_downstream_llm, classes=invalid_classes)
 
     with pytest.raises(AssertionError):
-        MarkerBasedClassifier(llm=mock_downstream_llm, classes=invalid_classes)
+        MarkerBasedPredictor(llm=mock_downstream_llm, classes=invalid_classes)
 
 
 def test_marker_based_missing_markers(mock_downstream_llm):
-    """Test MarkerBasedClassifier behavior when markers are missing."""
-    classifier = MarkerBasedClassifier(llm=mock_downstream_llm, classes=["will", "not", "be", "used"])
+    """Test MarkerBasedPredictor behavior when markers are missing."""
+    classifier = MarkerBasedPredictor(llm=mock_downstream_llm, classes=["will", "not", "be", "used"])
 
     # When markers are missing, it should default to first class
     prompts = ["Classify:"]
diff --git a/tutorials/aime_eval.py b/tutorials/aime_eval.py
diff --git a/tutorials/api_llm_demo.py b/tutorials/api_llm_demo.py
diff --git a/tutorials/capo_demo.py b/tutorials/capo_demo.py
diff --git a/tutorials/evoprompt_demo.py b/tutorials/evoprompt_demo.py
diff --git a/tutorials/opro_demo.py b/tutorials/opro_demo.py