Skip to content

Commit 0a3024e

Browse files
authored
Docs/augmentation tutorials (#162)
* update tutorials * move `DatasetBalancer` and `Generator` * leave a reference to guides * update dependencies * fix typing * refactor `generation` module * minor fixes * add usage examples to evolver and basic generator * fix dspy issue * add reference to data aug tutorials * remove dspy * change a little bit
1 parent 73a87eb commit 0a3024e

File tree

23 files changed

+96
-52
lines changed

23 files changed

+96
-52
lines changed

autointent/_pipeline/_pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
class Pipeline:
3838
"""Pipeline optimizer class.
3939
40-
See tutorial on AutoML features of AutoIntent.
40+
See tutorial on AutoML features of AutoIntent in :ref:`user_guides`.
4141
"""
4242

4343
def __init__(

autointent/generation/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,8 @@
1-
"""Some generative methods for enriching training datasets."""
1+
"""Some generative methods for enriching training datasets.
2+
3+
See :ref:`data-aug-tuts`.
4+
"""
5+
6+
from ._generator import Generator
7+
8+
__all__ = ["Generator"]

autointent/generation/utterances/generator.py renamed to autointent/generation/_generator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ class Generator:
1616
1717
Args:
1818
base_url: HTTP-endpoint for sending API requests to OpenAI API compatible server.
19-
Omit this to infer OPENAI_BASE_URL from environment.
20-
model_name: Name of LLM. Omit this to infer OPENAI_MODEL_NAME from environment.
19+
Omit this to infer ``OPENAI_BASE_URL`` from environment.
20+
model_name: Name of LLM. Omit this to infer ``OPENAI_MODEL_NAME`` from environment.
2121
**generation_params: kwargs that will be sent with a request to the endpoint.
2222
"""
2323

@@ -28,7 +28,7 @@ class Generator:
2828
"temperature": 0.7,
2929
}
3030

31-
def __init__(self, base_url: str | None = None, model_name: str | None = None, **generation_params: Any) -> None: # noqa: ANN401, D107
31+
def __init__(self, base_url: str | None = None, model_name: str | None = None, **generation_params: Any) -> None: # noqa: ANN401
3232
if not base_url:
3333
base_url = os.environ["OPENAI_BASE_URL"]
3434
if not model_name:

autointent/generation/chat_templates/_base_synthesizer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import ClassVar
77

88
from autointent import Dataset
9+
from autointent.custom_types import Split
910
from autointent.schemas import Intent
1011

1112
from ._evolution_templates_schemas import Message, Role
@@ -38,7 +39,7 @@ class BaseSynthesizerTemplate(BaseChatTemplate):
3839
def __init__(
3940
self,
4041
dataset: Dataset,
41-
split: str,
42+
split: str = Split.TRAIN,
4243
extra_instructions: str | None = None,
4344
max_sample_utterances: int | None = None,
4445
) -> None:

autointent/generation/utterances/__init__.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
"""Generative methods for enriching dataset with synthetic samples."""
22

3-
from .balancer import DatasetBalancer
4-
from .basic import UtteranceGenerator
5-
from .evolution import (
6-
IncrementalUtteranceEvolver,
7-
UtteranceEvolver,
8-
)
9-
from .generator import Generator
3+
from ._basic import DatasetBalancer, UtteranceGenerator
4+
from ._evolution import IncrementalUtteranceEvolver, UtteranceEvolver
105

116
__all__ = [
127
"DatasetBalancer",
13-
"Generator",
148
"IncrementalUtteranceEvolver",
159
"UtteranceEvolver",
1610
"UtteranceGenerator",
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .balancer import DatasetBalancer
2+
from .utterance_generator import UtteranceGenerator
3+
4+
__all__ = ["DatasetBalancer", "UtteranceGenerator"]

autointent/generation/utterances/balancer.py renamed to autointent/generation/utterances/_basic/balancer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77

88
from autointent import Dataset
99
from autointent.custom_types import Split
10+
from autointent.generation import Generator
1011
from autointent.generation.chat_templates import BaseSynthesizerTemplate
11-
from autointent.generation.utterances.basic.utterance_generator import UtteranceGenerator
12-
from autointent.generation.utterances.generator import Generator
12+
13+
from .utterance_generator import UtteranceGenerator
1314

1415
logger = logging.getLogger(__name__)
1516

@@ -30,7 +31,7 @@ class DatasetBalancer:
3031
Must be a positive integer or None. Defaults to None.
3132
"""
3233

33-
def __init__( # noqa: D107
34+
def __init__(
3435
self,
3536
generator: Generator,
3637
prompt_maker: BaseSynthesizerTemplate,

autointent/generation/utterances/basic/cli.py renamed to autointent/generation/utterances/_basic/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
from argparse import ArgumentParser
55

66
from autointent import load_dataset
7+
from autointent.generation import Generator
78
from autointent.generation.chat_templates import EnglishSynthesizerTemplate, RussianSynthesizerTemplate
8-
from autointent.generation.utterances import Generator, UtteranceGenerator
9+
from autointent.generation.utterances import UtteranceGenerator
910

1011
logging.basicConfig(level="INFO")
1112
logger = logging.getLogger(__name__)

autointent/generation/utterances/basic/utterance_generator.py renamed to autointent/generation/utterances/_basic/utterance_generator.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
from autointent import Dataset
99
from autointent.custom_types import Split
10+
from autointent.generation import Generator
1011
from autointent.generation.chat_templates import BaseSynthesizerTemplate
11-
from autointent.generation.utterances.generator import Generator
1212
from autointent.schemas import Intent, Sample
1313

1414

@@ -23,9 +23,26 @@ class UtteranceGenerator:
2323
generator: Generator instance for generating utterances.
2424
prompt_maker: Prompt maker instance for generating prompts.
2525
async_mode: Whether to use asynchronous mode for generation.
26+
27+
Usage
28+
-----
29+
30+
.. code-block:: python
31+
32+
from autointent import Dataset
33+
from autointent.generation import Generator
34+
from autointent.generation.utterances import UtteranceGenerator
35+
from autointent.generation.chat_templates import RussianSynthesizerTemplate
36+
37+
dataset = Dataset.from_json(path_to_json)
38+
generator = Generator()
39+
prompt = RussianSynthesizerTemplate(dataset)
40+
augmenter = UtteranceGenerator(generator, prompt_maker=prompt)
41+
augmenter.augment(dataset)
42+
2643
"""
2744

28-
def __init__(self, generator: Generator, prompt_maker: BaseSynthesizerTemplate, async_mode: bool = False) -> None: # noqa: D107
45+
def __init__(self, generator: Generator, prompt_maker: BaseSynthesizerTemplate, async_mode: bool = False) -> None:
2946
self.generator = generator
3047
self.prompt_maker = prompt_maker
3148
self.async_mode = async_mode
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .evolver import UtteranceEvolver
2+
from .incremental_evolver import IncrementalUtteranceEvolver
3+
4+
__all__ = ["IncrementalUtteranceEvolver", "UtteranceEvolver"]

0 commit comments

Comments
 (0)