diff --git a/autointent/generation/intents/_description_generation.py b/autointent/generation/intents/_description_generation.py index 1ce9eb768..08edb485e 100644 --- a/autointent/generation/intents/_description_generation.py +++ b/autointent/generation/intents/_description_generation.py @@ -139,6 +139,8 @@ def generate_descriptions( prompt: Template for model prompt with placeholders for intent_name, user_utterances, and regex_patterns. model_name: OpenAI model identifier for generating descriptions. + + See :ref:`intent_description_generation` tutorial. """ samples = [] for split in dataset.values(): diff --git a/autointent/generation/utterances/_evolution/incremental_evolver.py b/autointent/generation/utterances/_evolution/incremental_evolver.py index fb0826795..f9cd198c4 100644 --- a/autointent/generation/utterances/_evolution/incremental_evolver.py +++ b/autointent/generation/utterances/_evolution/incremental_evolver.py @@ -43,7 +43,7 @@ class IncrementalUtteranceEvolver(UtteranceEvolver): """Incremental evolutionary strategy to augmenting utterances. This method adds LLM-generated training samples until the quality - of linear classification on resulting dataset is rising. + of linear classification on resulting dataset stops rising. Args: generator: Generator instance for generating utterances. diff --git a/docs/source/augmentation_tutorials/index.rst b/docs/source/augmentation_tutorials/index.rst index 665c45bf7..38280bda6 100644 --- a/docs/source/augmentation_tutorials/index.rst +++ b/docs/source/augmentation_tutorials/index.rst @@ -8,3 +8,4 @@ Data augmentation tutorials balancer dspy_augmentation + intent_description diff --git a/docs/source/augmentation_tutorials/intent_description.rst b/docs/source/augmentation_tutorials/intent_description.rst new file mode 100644 index 000000000..3227f97f4 --- /dev/null +++ b/docs/source/augmentation_tutorials/intent_description.rst @@ -0,0 +1,100 @@ +.. _intent_description_generation: + +Intent Description Generation +############################# + +This documentation covers the implementation and usage of the Intent Description Generation module. It explains the function of the module, the underlying mechanisms, and provides examples of usage. + +The approach used in this module is based on the paper `Exploring Description-Augmented Dataless Intent Classification `_. + +.. contents:: Table of Contents + :depth: 2 + +Overview +-------- + +The Intent Description Generation module is designed to automatically generate detailed and coherent descriptions of intents using large language models (LLMs). It enhances datasets by creating human-readable explanations for intents, supplemented by examples (utterances) and regex patterns. + +How the Module Works +-------------------- + +The module leverages prompt engineering to interact with LLMs, creating structured intent descriptions that are suitable for documentation, user interaction, and training purposes. Each generated description includes: + +- **Intent Name**: Clearly identifies the intent. +- **Examples (User Utterances)**: Demonstrates real-world user inputs. +- **Regex Patterns**: Highlights relevant regex patterns associated with the intent. + +The module uses a templated approach, defined through `PromptDescription`, to maintain consistency and clarity across descriptions. + +Installation +------------ + +Ensure you have the necessary dependencies installed: + +.. code-block:: bash + + pip install autointent openai + +Usage +----- + +Here's an example demonstrating how to generate intent descriptions: + +.. code-block:: python + + import openai + from autointent import Dataset + from autointent.generation.intents import generate_descriptions + from autointent.generation.chat_templates import PromptDescription + + client = openai.AsyncOpenAI( + api_key="your-api-key" + ) + + dataset = Dataset.from_hub("AutoIntent/clinc150_subset") + + prompt = PromptDescription( + text="Describe intent {intent_name} with examples: {user_utterances} and patterns: {regex_patterns}", + ) + + enhanced_dataset = generate_descriptions( + dataset=dataset, + client=client, + prompt=prompt, + model_name="gpt4o-mini", + ) + + enhanced_dataset.to_csv("enhanced_clinc150.csv") + +Prompt Customization +-------------------- + +The `PromptDescription` can be customized to better fit specific requirements. It uses the following placeholders: + +- ``{intent_name}``: The name of the intent being described. +- ``{user_utterances}``: Example utterances related to the intent. +- ``{regex_patterns}``: Associated regular expression patterns. + +Adjusting the prompt allows tailoring descriptions to different contexts or detail levels. + +Model Selection +--------------- + +This module supports various LLMs available through OpenAI-compatible APIs. Configure your preferred model via the `model_name` parameter. Refer to your LLM provider’s documentation for available models. + +Recommended models include: + +- ``gpt4o-mini`` (for balanced performance and efficiency) +- ``gpt-4`` (for maximum descriptive quality) + +API Integration +--------------- + +Ensure your OpenAI-compatible client is properly configured with an API endpoint and key: + +.. code-block:: python + + client = openai.AsyncOpenAI( + base_url="your-api-base-url", + api_key="your-api-key" + ) diff --git a/user_guides/basic_usage/03_automl.py b/user_guides/basic_usage/03_automl.py index 07f22fb02..23caa8a6e 100644 --- a/user_guides/basic_usage/03_automl.py +++ b/user_guides/basic_usage/03_automl.py @@ -103,6 +103,7 @@ # %% from autointent.configs import DataConfig + custom_pipeline.set_config(DataConfig(scheme="cv", n_folds=3)) # %% [markdown]