Skip to content

Commit 1ff18cf

Browse files
Darinochkavoorhs
andauthored
Feat/augmentation utterances (#94)
* feat: added generation utterances * feat: update generation * feat: change prompt templates * Refactor/move to our dataset class (#100) * refactor basic utterance generator * make `load_dataset` utility public * polish `load_dataset` utility * move basic utterance generator to `Dataset` * refactor cli for basic utterance generator * refactor evolutions module * some bug fix in basic utterance generation * some bug fix in evolutionary augmentations * refactor `Generator` and fix codestyle * fix typing * fix import issues * try to fix --------- Co-authored-by: Алексеев Илья <[email protected]> Co-authored-by: voorhs <[email protected]>
1 parent d9807cc commit 1ff18cf

27 files changed

+725
-22
lines changed

autointent/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,18 @@
66
from ._vector_index import VectorIndex
77
from ._dataset import Dataset
88
from ._hash import Hasher
9-
from .context import Context
9+
from .context import Context, load_dataset
1010
from ._pipeline import Pipeline
1111

12-
__all__ = ["Context", "Dataset", "Embedder", "Hasher", "Pipeline", "Ranker", "VectorIndex", "setup_logging"]
12+
13+
__all__ = [
14+
"Context",
15+
"Dataset",
16+
"Embedder",
17+
"Hasher",
18+
"Pipeline",
19+
"Ranker",
20+
"VectorIndex",
21+
"load_dataset",
22+
"setup_logging",
23+
]

autointent/context/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Core utilities for auto ML features."""
22

33
from ._context import Context
4+
from ._utils import load_dataset
45

5-
__all__ = ["Context"]
6+
__all__ = ["Context", "load_dataset"]

autointent/context/_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
VectorIndexConfig,
1818
)
1919

20-
from ._utils import NumpyEncoder, load_data
20+
from ._utils import NumpyEncoder, load_dataset
2121
from .data_handler import DataHandler
2222
from .optimization_info import OptimizationInfo
2323

@@ -81,7 +81,7 @@ def configure_data(self, config: DataConfig) -> None:
8181
:param config: Configuration for the data handling process.
8282
"""
8383
self.data_handler = DataHandler(
84-
dataset=load_data(config.train_path),
84+
dataset=load_dataset(config.train_path),
8585
random_seed=self.seed,
8686
)
8787

autointent/context/_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ def default(self, obj: Any) -> str | int | float | list[Any] | Any: # noqa: ANN
3737
return super().default(obj)
3838

3939

40-
def load_data(filepath: str | Path) -> Dataset:
40+
def load_dataset(path: str | Path) -> Dataset:
4141
"""
42-
Load data from a specified path or use default sample data.
42+
Load data from a specified path or use default sample data or load from hugging face hub.
4343
4444
This function loads a dataset from a JSON file or retrieves sample data
4545
included with the `autointent` package for default multiclass or multilabel
@@ -50,10 +50,10 @@ def load_data(filepath: str | Path) -> Dataset:
5050
- "default-multilabel": Loads sample multilabel dataset.
5151
:return: A `Dataset` object containing the loaded data.
5252
"""
53-
if filepath == "default-multiclass":
53+
if path == "default-multiclass":
5454
return Dataset.from_hub("AutoIntent/clinc150_subset")
55-
if filepath == "default-multilabel":
55+
if path == "default-multilabel":
5656
return Dataset.from_hub("AutoIntent/clinc150_subset").to_multilabel()
57-
if not Path(filepath).exists():
58-
return Dataset.from_hub(str(filepath))
59-
return Dataset.from_json(filepath)
57+
if not Path(path).exists():
58+
return Dataset.from_hub(str(path))
59+
return Dataset.from_json(path)

autointent/generation/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
"""Experimental subpackage that someday will evolve into data augmentation tools."""

autointent/generation/intents/__init__.py

Whitespace-only changes.

autointent/generation/description_generation.py renamed to autointent/generation/intents/description_generation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from openai import AsyncOpenAI
88

99
from autointent import Dataset
10-
from autointent.generation.prompt_scheme import PromptDescription
10+
from autointent.generation.intents.prompt_scheme import PromptDescription
1111
from autointent.schemas import Intent, Sample
1212

1313

autointent/generation/prompt_scheme.py renamed to autointent/generation/intents/prompt_scheme.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from pydantic import BaseModel, field_validator
44

5-
from autointent.generation.prompts import PROMPT_DESCRIPTION
5+
from autointent.generation.utterances.prompts import PROMPT_DESCRIPTION
66

77

88
class PromptDescription(BaseModel):

autointent/generation/utterances/__init__.py

Whitespace-only changes.

autointent/generation/utterances/basic/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)