@@ -64,8 +64,8 @@ def repetition_factor(true_text: str, augmented_text: str) -> float:
6464 Raises:
6565 ValueError: If the lengths of true_texts and augmented_texts differ.
6666 """
67- true_tokens = true_text .split ()
68- aug_tokens = augmented_text .split ()
67+ true_tokens = "" . join ( c for c in true_text . lower () if c . isalnum () or c . isspace ()) .split ()
68+ aug_tokens = "" . join ( c for c in augmented_text . lower () if c . isalnum () or c . isspace ()) .split ()
6969 if not true_tokens or not aug_tokens :
7070 return 0.0
7171 true_counts = Counter (true_tokens )
@@ -82,7 +82,7 @@ class SemanticRecallPrecision(dspy.Signature): # type: ignore[misc]
8282
8383 If asked to reason, enumerate key ideas in each response, and whether they are present in the other response.
8484
85- Copied from https://github.com/stanfordnlp/dspy/blob/2957c5f998e0bc652017b6e3b1f8af34970b6f6b/dspy/evaluate/auto_evaluation.py#L4-L14
85+ Copied from `dspy < https://github.com/stanfordnlp/dspy/blob/2957c5f998e0bc652017b6e3b1f8af34970b6f6b/dspy/evaluate/auto_evaluation.py#L4-L14>`_
8686 """
8787
8888 question : str = dspy .InputField ()
@@ -95,7 +95,7 @@ class SemanticRecallPrecision(dspy.Signature): # type: ignore[misc]
9595class AugmentSemanticF1 (dspy .Module ): # type: ignore[misc]
9696 """Compare a system's response to the ground truth to compute its recall and precision.
9797
98- Adapted from https://dspy.ai/api/evaluation/SemanticF1/
98+ Adapted from `dspy SemanticF1 < https://dspy.ai/api/evaluation/SemanticF1/>_
9999 """
100100
101101 def __init__ (self , threshold : float = 0.66 ) -> None :
@@ -151,6 +151,15 @@ class DSPYIncrementalUtteranceEvolver:
151151 For ground truth utterances, it would generate new utterances and evaluate them using the pipeline.
152152
153153 For scoring generations it would use modified SemanticF1 as the base metric with a ROUGE-1 as repetition penalty.
154+
155+ Args:
156+ model: Model name. This should follow naming schema from `litellm providers <https://docs.litellm.ai/docs/providers>`_.
157+ api_base: API base URL. Some models require this.
158+ temperature: Sampling temperature. 0.0 is default from dspy LM.
159+ max_tokens: Maximum number of tokens to generate. 1000 is default from dspy LM.
160+ seed: Random seed for reproducibility.
161+ search_space: Search space for the pipeline.
162+
154163 """
155164
156165 def __init__ (
@@ -162,18 +171,8 @@ def __init__(
162171 seed : int = 42 ,
163172 search_space : str | None = None ,
164173 ) -> None :
165- """Initialize the DSPYIncrementalUtteranceEvolver.
166-
167- Args:
168- model: Model name. This should follow naming schema from litellm.
169- https://docs.litellm.ai/docs/providers
170- api_base: API base URL. Some models require this.
171- temperature: Sampling temperature. 0.0 is default from dspy LM.
172- max_tokens: Maximum number of tokens to generate. 1000 is default from dspy LM.
173- seed: Random seed for reproducibility.
174- search_space: Search space for the pipeline.
175- """
176- self .search_space = search_space or DEFAULT_SEARCH_SPACE
174+ """Initialize the DSPYIncrementalUtteranceEvolver."""
175+ self ._search_space = search_space or DEFAULT_SEARCH_SPACE
177176 random .seed (seed )
178177
179178 llm = dspy .LM (
@@ -184,17 +183,17 @@ def __init__(
184183 max_tokens = max_tokens ,
185184 )
186185 dspy .settings .configure (lm = llm )
187- self .generator = dspy .ChainOfThoughtWithHint (AugmentationSignature )
186+ self ._generator = dspy .ChainOfThoughtWithHint (AugmentationSignature )
188187
189- def augment (
188+ def augment ( # noqa: C901
190189 self ,
191190 dataset : Dataset ,
192191 split_name : str = Split .TEST ,
193192 n_evolutions : int = 3 ,
194193 update_split : bool = True ,
195194 mipro_init_params : dict [str , Any ] | None = None ,
196195 mipro_compile_params : dict [str , Any ] | None = None ,
197- save_path : Path | str = "evolution_config" ,
196+ save_path : Path | str | None = None ,
198197 ) -> HFDataset :
199198 """Augment the dataset using the evolutionary strategy.
200199
@@ -204,10 +203,10 @@ def augment(
204203 n_evolutions: Number of evolutions to perform.
205204 update_split: Whether to update the split with the augmented data.
206205 mipro_init_params: Parameters for the MIPROv2 augmentation.
207- Full list of params available at https://dspy.ai/deep-dive/optimizers/miprov2/#initialization-parameters
206+ ` Full list of parameters < https://dspy.ai/deep-dive/optimizers/miprov2/#initialization-parameters>`_
208207 mipro_compile_params: Parameters for the MIPROv2 compilation.
209- Full list of params available at https://dspy.ai/deep-dive/optimizers/miprov2/#compile-parameters
210- save_path: Path to save the generated samples. Defaults to "evolution_config" .
208+ ` Full list of params available < https://dspy.ai/deep-dive/optimizers/miprov2/#compile-parameters>`_
209+ save_path: Path to save the prompt of LLM. If None is provided, it will not be saved .
211210
212211 Returns:
213212 The augmented dataset.
@@ -221,11 +220,12 @@ def augment(
221220 if mipro_compile_params is None :
222221 mipro_compile_params = {}
223222
224- if isinstance (save_path , str ):
225- save_path = Path (save_path )
223+ if save_path is not None :
224+ if isinstance (save_path , str ):
225+ save_path = Path (save_path )
226226
227- if not save_path .exists ():
228- save_path .mkdir (parents = True )
227+ if not save_path .exists ():
228+ save_path .mkdir (parents = True )
229229
230230 dspy_dataset = [
231231 dspy .Example (
@@ -242,12 +242,13 @@ def augment(
242242
243243 optimizer = dspy .MIPROv2 (metric = metric , ** mipro_init_params )
244244
245- optimized_module = optimizer .compile (self .generator , trainset = dspy_dataset , ** mipro_compile_params )
245+ optimized_module = optimizer .compile (self ._generator , trainset = dspy_dataset , ** mipro_compile_params )
246246
247- optimized_module .save ((save_path / f"evolution_{ i } " ).as_posix (), save_program = True )
248- optimized_module .save (
249- (save_path / f"evolution_{ i } " / "generator_state.json" ).as_posix (), save_program = False
250- )
247+ if save_path is not None :
248+ optimized_module .save ((save_path / f"evolution_{ i } " ).as_posix (), save_program = True )
249+ optimized_module .save (
250+ (save_path / f"evolution_{ i } " / "generator_state.json" ).as_posix (), save_program = False
251+ )
251252 # Generate new samples
252253 new_samples = []
253254 for sample in original_split :
@@ -261,7 +262,7 @@ def augment(
261262 generated_samples .append (new_samples_dataset )
262263
263264 # Check if the new samples improve the model
264- pipeline_optimizer = Pipeline .from_search_space (self .search_space )
265+ pipeline_optimizer = Pipeline .from_search_space (self ._search_space )
265266 ctx = pipeline_optimizer .fit (merge_dataset )
266267 results = ctx .optimization_info .dump_evaluation_results ()
267268 decision_metric = results ["metrics" ]["decision" ][0 ]
0 commit comments