add DSPY tutorial (#161)

Samoed · web-flow · commit ed981ced92fc · 2025-03-07T00:17:17.000+03:00
* add tutorial

* fix docs

* fix docs
diff --git a/autointent/generation/utterances/evolution/dspy_evolver.py b/autointent/generation/utterances/evolution/dspy_evolver.py
@@ -64,8 +64,8 @@ def repetition_factor(true_text: str, augmented_text: str) -> float:
     Raises:
         ValueError: If the lengths of true_texts and augmented_texts differ.
     """
-    true_tokens = true_text.split()
-    aug_tokens = augmented_text.split()
+    true_tokens = "".join(c for c in true_text.lower() if c.isalnum() or c.isspace()).split()
+    aug_tokens = "".join(c for c in augmented_text.lower() if c.isalnum() or c.isspace()).split()
     if not true_tokens or not aug_tokens:
         return 0.0
     true_counts = Counter(true_tokens)
@@ -82,7 +82,7 @@ class SemanticRecallPrecision(dspy.Signature):  # type: ignore[misc]
 
     If asked to reason, enumerate key ideas in each response, and whether they are present in the other response.
 
-    Copied from https://github.com/stanfordnlp/dspy/blob/2957c5f998e0bc652017b6e3b1f8af34970b6f6b/dspy/evaluate/auto_evaluation.py#L4-L14
+    Copied from `dspy <https://github.com/stanfordnlp/dspy/blob/2957c5f998e0bc652017b6e3b1f8af34970b6f6b/dspy/evaluate/auto_evaluation.py#L4-L14>`_
     """
 
     question: str = dspy.InputField()
@@ -95,7 +95,7 @@ class SemanticRecallPrecision(dspy.Signature):  # type: ignore[misc]
 class AugmentSemanticF1(dspy.Module):  # type: ignore[misc]
     """Compare a system's response to the ground truth to compute its recall and precision.
 
-    Adapted from https://dspy.ai/api/evaluation/SemanticF1/
+    Adapted from `dspy SemanticF1 <https://dspy.ai/api/evaluation/SemanticF1/>_
     """
 
     def __init__(self, threshold: float = 0.66) -> None:
@@ -151,6 +151,15 @@ class DSPYIncrementalUtteranceEvolver:
     For ground truth utterances, it would generate new utterances and evaluate them using the pipeline.
 
     For scoring generations it would use modified SemanticF1 as the base metric with a ROUGE-1 as repetition penalty.
+
+    Args:
+        model: Model name. This should follow naming schema from `litellm providers <https://docs.litellm.ai/docs/providers>`_.
+        api_base: API base URL. Some models require this.
+        temperature: Sampling temperature. 0.0 is default from dspy LM.
+        max_tokens: Maximum number of tokens to generate. 1000 is default from dspy LM.
+        seed: Random seed for reproducibility.
+        search_space: Search space for the pipeline.
+
     """
 
     def __init__(
@@ -162,18 +171,8 @@ def __init__(
         seed: int = 42,
         search_space: str | None = None,
     ) -> None:
-        """Initialize the DSPYIncrementalUtteranceEvolver.
-
-        Args:
-            model: Model name. This should follow naming schema from litellm.
-                https://docs.litellm.ai/docs/providers
-            api_base: API base URL. Some models require this.
-            temperature: Sampling temperature. 0.0 is default from dspy LM.
-            max_tokens: Maximum number of tokens to generate. 1000 is default from dspy LM.
-            seed: Random seed for reproducibility.
-            search_space: Search space for the pipeline.
-        """
-        self.search_space = search_space or DEFAULT_SEARCH_SPACE
+        """Initialize the DSPYIncrementalUtteranceEvolver."""
+        self._search_space = search_space or DEFAULT_SEARCH_SPACE
         random.seed(seed)
 
         llm = dspy.LM(
@@ -184,17 +183,17 @@ def __init__(
             max_tokens=max_tokens,
         )
         dspy.settings.configure(lm=llm)
-        self.generator = dspy.ChainOfThoughtWithHint(AugmentationSignature)
+        self._generator = dspy.ChainOfThoughtWithHint(AugmentationSignature)
 
-    def augment(
+    def augment(  # noqa: C901
         self,
         dataset: Dataset,
         split_name: str = Split.TEST,
         n_evolutions: int = 3,
         update_split: bool = True,
         mipro_init_params: dict[str, Any] | None = None,
         mipro_compile_params: dict[str, Any] | None = None,
-        save_path: Path | str = "evolution_config",
+        save_path: Path | str | None = None,
     ) -> HFDataset:
         """Augment the dataset using the evolutionary strategy.
 
@@ -204,10 +203,10 @@ def augment(
             n_evolutions: Number of evolutions to perform.
             update_split: Whether to update the split with the augmented data.
             mipro_init_params: Parameters for the MIPROv2 augmentation.
-                Full list of params available at https://dspy.ai/deep-dive/optimizers/miprov2/#initialization-parameters
+                `Full list of parameters <https://dspy.ai/deep-dive/optimizers/miprov2/#initialization-parameters>`_
             mipro_compile_params: Parameters for the MIPROv2 compilation.
-                Full list of params available at https://dspy.ai/deep-dive/optimizers/miprov2/#compile-parameters
-            save_path: Path to save the generated samples. Defaults to "evolution_config".
+                `Full list of params available <https://dspy.ai/deep-dive/optimizers/miprov2/#compile-parameters>`_
+            save_path: Path to save the prompt of LLM. If None is provided, it will not be saved.
 
         Returns:
             The augmented dataset.
@@ -221,11 +220,12 @@ def augment(
         if mipro_compile_params is None:
             mipro_compile_params = {}
 
-        if isinstance(save_path, str):
-            save_path = Path(save_path)
+        if save_path is not None:
+            if isinstance(save_path, str):
+                save_path = Path(save_path)
 
-        if not save_path.exists():
-            save_path.mkdir(parents=True)
+            if not save_path.exists():
+                save_path.mkdir(parents=True)
 
         dspy_dataset = [
             dspy.Example(
@@ -242,12 +242,13 @@ def augment(
 
             optimizer = dspy.MIPROv2(metric=metric, **mipro_init_params)
 
-            optimized_module = optimizer.compile(self.generator, trainset=dspy_dataset, **mipro_compile_params)
+            optimized_module = optimizer.compile(self._generator, trainset=dspy_dataset, **mipro_compile_params)
 
-            optimized_module.save((save_path / f"evolution_{i}").as_posix(), save_program=True)
-            optimized_module.save(
-                (save_path / f"evolution_{i}" / "generator_state.json").as_posix(), save_program=False
-            )
+            if save_path is not None:
+                optimized_module.save((save_path / f"evolution_{i}").as_posix(), save_program=True)
+                optimized_module.save(
+                    (save_path / f"evolution_{i}" / "generator_state.json").as_posix(), save_program=False
+                )
             # Generate new samples
             new_samples = []
             for sample in original_split:
@@ -261,7 +262,7 @@ def augment(
             generated_samples.append(new_samples_dataset)
 
             # Check if the new samples improve the model
-            pipeline_optimizer = Pipeline.from_search_space(self.search_space)
+            pipeline_optimizer = Pipeline.from_search_space(self._search_space)
             ctx = pipeline_optimizer.fit(merge_dataset)
             results = ctx.optimization_info.dump_evaluation_results()
             decision_metric = results["metrics"]["decision"][0]
diff --git a/docs/source/augmentation_tutorials/dspy_augmentation.rst b/docs/source/augmentation_tutorials/dspy_augmentation.rst
@@ -0,0 +1,88 @@
+.. _evolutionary_strategy_augmentation:
+
+DSPY Augmentation
+#################
+
+This tutorial covers the implementation and usage of an evolutionary strategy to augment utterances using DSPy. It explains how DSPy is used, how the module functions, and how the scoring metric works.
+
+.. contents:: Table of Contents
+    :depth: 2
+
+-------------
+What is DSPy?
+-------------
+
+DSPy is a framework for optimizing and evaluating language models. It provides tools for defining signatures, optimizing modules, and measuring evaluation metrics. This module leverages DSPy to generate augmented utterances using an evolutionary approach.
+
+---------------------
+How This Module Works
+---------------------
+
+This module applies an incremental evolutionary strategy for augmenting utterances. It generates new utterances based on a given dataset and refines them using an iterative process. The generated utterances are evaluated using a scoring mechanism that includes:
+
+- **SemanticF1**: Measures how well the generated utterance matches the ground truth.
+- **ROUGE-1 penalty**: Discourages excessive repetition.
+- **Pipeline Decision Metric**: Assesses whether the augmented utterances improve model performance.
+
+The augmentation process runs for a specified number of evolutions, saving intermediate models and optimizing the results.
+
+------------
+Installation
+------------
+
+Ensure you have the required dependencies installed:
+
+.. code-block:: bash
+
+    pip install "autointent[dspy]"
+
+--------------
+Scoring Metric
+--------------
+
+The scoring metric consists of:
+
+1. **SemanticF1 Score**:
+   - Computes precision and recall between system-generated utterances and ground truth by LLM.
+   - Uses DSPy’s `SemanticRecallPrecision` module.
+
+2. **Repetition Factor (ROUGE-1 Penalty)**:
+   - Measures overlap of words between the generated and ground truth utterances.
+   - Ensures diversity in augmentation.
+
+3. **Final Score Calculation**:
+   - `Final Score = SemanticF1 * Repetition Factor`
+   - A higher score means better augmentation.
+
+-------------
+Usage Example
+-------------
+
+Before running the following code, refer to the `LiteLLM documentation <https://docs.litellm.ai/docs/providers>`_ for proper model configuration.
+
+.. code-block:: python
+
+    import os
+    os.environ["OPENAI_API_KEY"] = "your-api-key"
+
+    from autointent import Dataset
+    from autointent.custom_types import Split
+
+    dataset = Dataset.from_hub("AutoIntent/clinc150_subset")
+    evolver = DSPYIncrementalUtteranceEvolver(
+        "openai/gpt-4o-mini"
+    )
+
+    augmented_dataset = evolver.augment(
+        dataset,
+        split_name=Split.TEST,
+        n_evolutions=1,
+        mipro_init_params={
+            "auto": "light",
+        },
+        mipro_compile_params={
+            "minibatch": False,
+        },
+    )
+
+    augmented_dataset.to_csv("clinc150_dspy_augment.csv")
diff --git a/docs/source/user_guides.rst b/docs/source/user_guides.rst
@@ -8,3 +8,12 @@ User Guides
    user_guides/index_basic_usage
    user_guides/index_advanced_usage
    user_guides/index_cli_usage
+
+Data augmentation tutorials
+---------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   augmentation_tutorials/dspy_augmentation
+