Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 33 additions & 32 deletions autointent/generation/utterances/evolution/dspy_evolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def repetition_factor(true_text: str, augmented_text: str) -> float:
Raises:
ValueError: If the lengths of true_texts and augmented_texts differ.
"""
true_tokens = true_text.split()
aug_tokens = augmented_text.split()
true_tokens = "".join(c for c in true_text.lower() if c.isalnum() or c.isspace()).split()
aug_tokens = "".join(c for c in augmented_text.lower() if c.isalnum() or c.isspace()).split()
if not true_tokens or not aug_tokens:
return 0.0
true_counts = Counter(true_tokens)
Expand All @@ -82,7 +82,7 @@ class SemanticRecallPrecision(dspy.Signature): # type: ignore[misc]

If asked to reason, enumerate key ideas in each response, and whether they are present in the other response.

Copied from https://github.com/stanfordnlp/dspy/blob/2957c5f998e0bc652017b6e3b1f8af34970b6f6b/dspy/evaluate/auto_evaluation.py#L4-L14
Copied from `dspy <https://github.com/stanfordnlp/dspy/blob/2957c5f998e0bc652017b6e3b1f8af34970b6f6b/dspy/evaluate/auto_evaluation.py#L4-L14>`_
"""

question: str = dspy.InputField()
Expand All @@ -95,7 +95,7 @@ class SemanticRecallPrecision(dspy.Signature): # type: ignore[misc]
class AugmentSemanticF1(dspy.Module): # type: ignore[misc]
"""Compare a system's response to the ground truth to compute its recall and precision.

Adapted from https://dspy.ai/api/evaluation/SemanticF1/
Adapted from `dspy SemanticF1 <https://dspy.ai/api/evaluation/SemanticF1/>_
"""

def __init__(self, threshold: float = 0.66) -> None:
Expand Down Expand Up @@ -151,6 +151,15 @@ class DSPYIncrementalUtteranceEvolver:
For ground truth utterances, it would generate new utterances and evaluate them using the pipeline.

For scoring generations it would use modified SemanticF1 as the base metric with a ROUGE-1 as repetition penalty.

Args:
model: Model name. This should follow naming schema from `litellm providers <https://docs.litellm.ai/docs/providers>`_.
api_base: API base URL. Some models require this.
temperature: Sampling temperature. 0.0 is default from dspy LM.
max_tokens: Maximum number of tokens to generate. 1000 is default from dspy LM.
seed: Random seed for reproducibility.
search_space: Search space for the pipeline.

"""

def __init__(
Expand All @@ -162,18 +171,8 @@ def __init__(
seed: int = 42,
search_space: str | None = None,
) -> None:
"""Initialize the DSPYIncrementalUtteranceEvolver.

Args:
model: Model name. This should follow naming schema from litellm.
https://docs.litellm.ai/docs/providers
api_base: API base URL. Some models require this.
temperature: Sampling temperature. 0.0 is default from dspy LM.
max_tokens: Maximum number of tokens to generate. 1000 is default from dspy LM.
seed: Random seed for reproducibility.
search_space: Search space for the pipeline.
"""
self.search_space = search_space or DEFAULT_SEARCH_SPACE
"""Initialize the DSPYIncrementalUtteranceEvolver."""
self._search_space = search_space or DEFAULT_SEARCH_SPACE
random.seed(seed)

llm = dspy.LM(
Expand All @@ -184,17 +183,17 @@ def __init__(
max_tokens=max_tokens,
)
dspy.settings.configure(lm=llm)
self.generator = dspy.ChainOfThoughtWithHint(AugmentationSignature)
self._generator = dspy.ChainOfThoughtWithHint(AugmentationSignature)

def augment(
def augment( # noqa: C901
self,
dataset: Dataset,
split_name: str = Split.TEST,
n_evolutions: int = 3,
update_split: bool = True,
mipro_init_params: dict[str, Any] | None = None,
mipro_compile_params: dict[str, Any] | None = None,
save_path: Path | str = "evolution_config",
save_path: Path | str | None = None,
) -> HFDataset:
"""Augment the dataset using the evolutionary strategy.

Expand All @@ -204,10 +203,10 @@ def augment(
n_evolutions: Number of evolutions to perform.
update_split: Whether to update the split with the augmented data.
mipro_init_params: Parameters for the MIPROv2 augmentation.
Full list of params available at https://dspy.ai/deep-dive/optimizers/miprov2/#initialization-parameters
`Full list of parameters <https://dspy.ai/deep-dive/optimizers/miprov2/#initialization-parameters>`_
mipro_compile_params: Parameters for the MIPROv2 compilation.
Full list of params available at https://dspy.ai/deep-dive/optimizers/miprov2/#compile-parameters
save_path: Path to save the generated samples. Defaults to "evolution_config".
`Full list of params available <https://dspy.ai/deep-dive/optimizers/miprov2/#compile-parameters>`_
save_path: Path to save the prompt of LLM. If None is provided, it will not be saved.

Returns:
The augmented dataset.
Expand All @@ -221,11 +220,12 @@ def augment(
if mipro_compile_params is None:
mipro_compile_params = {}

if isinstance(save_path, str):
save_path = Path(save_path)
if save_path is not None:
if isinstance(save_path, str):
save_path = Path(save_path)

if not save_path.exists():
save_path.mkdir(parents=True)
if not save_path.exists():
save_path.mkdir(parents=True)

dspy_dataset = [
dspy.Example(
Expand All @@ -242,12 +242,13 @@ def augment(

optimizer = dspy.MIPROv2(metric=metric, **mipro_init_params)

optimized_module = optimizer.compile(self.generator, trainset=dspy_dataset, **mipro_compile_params)
optimized_module = optimizer.compile(self._generator, trainset=dspy_dataset, **mipro_compile_params)

optimized_module.save((save_path / f"evolution_{i}").as_posix(), save_program=True)
optimized_module.save(
(save_path / f"evolution_{i}" / "generator_state.json").as_posix(), save_program=False
)
if save_path is not None:
optimized_module.save((save_path / f"evolution_{i}").as_posix(), save_program=True)
optimized_module.save(
(save_path / f"evolution_{i}" / "generator_state.json").as_posix(), save_program=False
)
# Generate new samples
new_samples = []
for sample in original_split:
Expand All @@ -261,7 +262,7 @@ def augment(
generated_samples.append(new_samples_dataset)

# Check if the new samples improve the model
pipeline_optimizer = Pipeline.from_search_space(self.search_space)
pipeline_optimizer = Pipeline.from_search_space(self._search_space)
ctx = pipeline_optimizer.fit(merge_dataset)
results = ctx.optimization_info.dump_evaluation_results()
decision_metric = results["metrics"]["decision"][0]
Expand Down
88 changes: 88 additions & 0 deletions docs/source/augmentation_tutorials/dspy_augmentation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
.. _evolutionary_strategy_augmentation:

DSPY Augmentation
#################

This tutorial covers the implementation and usage of an evolutionary strategy to augment utterances using DSPy. It explains how DSPy is used, how the module functions, and how the scoring metric works.

.. contents:: Table of Contents
:depth: 2

-------------
What is DSPy?
-------------

DSPy is a framework for optimizing and evaluating language models. It provides tools for defining signatures, optimizing modules, and measuring evaluation metrics. This module leverages DSPy to generate augmented utterances using an evolutionary approach.

---------------------
How This Module Works
---------------------

This module applies an incremental evolutionary strategy for augmenting utterances. It generates new utterances based on a given dataset and refines them using an iterative process. The generated utterances are evaluated using a scoring mechanism that includes:

- **SemanticF1**: Measures how well the generated utterance matches the ground truth.
- **ROUGE-1 penalty**: Discourages excessive repetition.
- **Pipeline Decision Metric**: Assesses whether the augmented utterances improve model performance.

The augmentation process runs for a specified number of evolutions, saving intermediate models and optimizing the results.

------------
Installation
------------

Ensure you have the required dependencies installed:

.. code-block:: bash

pip install "autointent[dspy]"

--------------
Scoring Metric
--------------

The scoring metric consists of:

1. **SemanticF1 Score**:
- Computes precision and recall between system-generated utterances and ground truth by LLM.
- Uses DSPy’s `SemanticRecallPrecision` module.

2. **Repetition Factor (ROUGE-1 Penalty)**:
- Measures overlap of words between the generated and ground truth utterances.
- Ensures diversity in augmentation.

3. **Final Score Calculation**:
- `Final Score = SemanticF1 * Repetition Factor`
- A higher score means better augmentation.

-------------
Usage Example
-------------

Before running the following code, refer to the `LiteLLM documentation <https://docs.litellm.ai/docs/providers>`_ for proper model configuration.

.. code-block:: python

import os
os.environ["OPENAI_API_KEY"] = "your-api-key"

from autointent import Dataset
from autointent.custom_types import Split

dataset = Dataset.from_hub("AutoIntent/clinc150_subset")
evolver = DSPYIncrementalUtteranceEvolver(
"openai/gpt-4o-mini"
)

augmented_dataset = evolver.augment(
dataset,
split_name=Split.TEST,
n_evolutions=1,
mipro_init_params={
"auto": "light",
},
mipro_compile_params={
"minibatch": False,
},
)

augmented_dataset.to_csv("clinc150_dspy_augment.csv")
9 changes: 9 additions & 0 deletions docs/source/user_guides.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,12 @@ User Guides
user_guides/index_basic_usage
user_guides/index_advanced_usage
user_guides/index_cli_usage

Data augmentation tutorials
---------------------------

.. toctree::
:maxdepth: 1

augmentation_tutorials/dspy_augmentation