Skip to content

Commit 53806e6

Browse files
authored
Fix/documentation (#248)
* make preparations * fix catboost * fix doctests for bert-based methods * minor fixes * upd readme * fix tutorials * upd quickstart page * run formatter * fix typing
1 parent 8a4bc8e commit 53806e6

File tree

13 files changed

+44
-49
lines changed

13 files changed

+44
-49
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,7 @@ pipeline = Pipeline.from_preset("light")
3434
pipeline.fit(dataset)
3535
pipeline.predict(["show me my latest transactions"])
3636
```
37+
38+
## Disclaimer
39+
40+
This project is in an development phase. Bugs and breaking changes are expected. Contributions and feedback are welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md).

autointent/modules/scoring/_bert.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import tempfile
44
from collections.abc import Callable
5-
from typing import Any
5+
from typing import Any, Literal
66

77
import numpy as np
88
import numpy.typing as npt
@@ -15,6 +15,8 @@
1515
DataCollatorWithPadding,
1616
EarlyStoppingCallback,
1717
EvalPrediction,
18+
PrinterCallback,
19+
ProgressCallback,
1820
Trainer,
1921
TrainingArguments,
2022
)
@@ -84,8 +86,9 @@ def __init__(
8486
batch_size: int = 8,
8587
learning_rate: float = 5e-5,
8688
seed: int = 0,
87-
report_to: REPORTERS_NAMES | None = None, # type: ignore # noqa: PGH003
89+
report_to: REPORTERS_NAMES | Literal["none"] = "none", # type: ignore # noqa: PGH003
8890
early_stopping_config: EarlyStoppingConfig | dict[str, Any] | None = None,
91+
print_progress: bool = False,
8992
) -> None:
9093
self.classification_model_config = HFModelConfig.from_search_config(classification_model_config)
9194
self.num_train_epochs = num_train_epochs
@@ -94,6 +97,7 @@ def __init__(
9497
self.seed = seed
9598
self.report_to = report_to
9699
self.early_stopping_config = EarlyStoppingConfig.from_search_config(early_stopping_config)
100+
self.print_progress = print_progress
97101

98102
@classmethod
99103
def from_context(
@@ -187,6 +191,9 @@ def _train(self, tokenized_dataset: DatasetDict) -> None:
187191
compute_metrics=self._get_compute_metrics(),
188192
callbacks=self._get_trainer_callbacks(),
189193
)
194+
if not self.print_progress:
195+
trainer.remove_callback(PrinterCallback) # type: ignore[attr-defined]
196+
trainer.remove_callback(ProgressCallback) # type: ignore[attr-defined]
190197

191198
trainer.train() # type: ignore[attr-defined]
192199

autointent/modules/scoring/_catboost/catboost_scorer.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class CatBoostScorer(BaseScorer):
5858
`catboost's documentation <https://catboost.ai/docs/en/concepts/python-reference_catboostclassifier>`_
5959
6060
Example:
61-
-------
61+
--------
6262
6363
.. testcode::
6464
@@ -79,12 +79,6 @@ class CatBoostScorer(BaseScorer):
7979
scorer.fit(utterances, labels)
8080
test_utterances = ["hi", "bye"]
8181
probabilities = scorer.predict(test_utterances)
82-
print(probabilities)
83-
84-
.. testoutput::
85-
86-
[[0.41493207 0.58506793]
87-
[0.55036046 0.44963954]]
8882
8983
"""
9084

autointent/modules/scoring/_lora/lora.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""BertScorer class for transformer-based classification with LoRA."""
22

33
from pathlib import Path
4-
from typing import Any
4+
from typing import Any, Literal
55

66
from peft import LoraConfig, get_peft_model
77

@@ -51,12 +51,7 @@ class BERTLoRAScorer(BertScorer):
5151
# Make predictions
5252
test_utterances = ["Good product", "Not worth it"]
5353
probabilities = scorer.predict(test_utterances)
54-
print(probabilities)
5554
56-
.. testoutput::
57-
58-
[[0.89 0.11]
59-
[0.23 0.77]]
6055
"""
6156

6257
name = "lora"
@@ -68,7 +63,8 @@ def __init__(
6863
batch_size: int = 8,
6964
learning_rate: float = 5e-5,
7065
seed: int = 0,
71-
report_to: REPORTERS_NAMES | None = None, # type: ignore[valid-type]
66+
report_to: REPORTERS_NAMES | Literal["none"] = "none", # type: ignore # noqa: PGH003
67+
print_progress: bool = False,
7268
**lora_kwargs: Any, # noqa: ANN401
7369
) -> None:
7470
# early stopping doesnt work with lora for now https://github.com/huggingface/transformers/issues/38130
@@ -82,6 +78,7 @@ def __init__(
8278
seed=seed,
8379
report_to=report_to,
8480
early_stopping_config=early_stopping_config,
81+
print_progress=print_progress,
8582
)
8683
self._lora_config = LoraConfig(**lora_kwargs)
8784

autointent/modules/scoring/_ptuning/ptuning.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class PTuningScorer(BertScorer):
3434
classification_model_config="prajjwal1/bert-tiny",
3535
num_train_epochs=3,
3636
batch_size=8,
37-
task_type="SEQ_CLS",
3837
num_virtual_tokens=10,
3938
seed=42
4039
)
@@ -43,12 +42,6 @@ class PTuningScorer(BertScorer):
4342
scorer.fit(utterances, labels)
4443
test_utterances = ["hi", "bye"]
4544
probabilities = scorer.predict(test_utterances)
46-
print(probabilities)
47-
48-
.. testoutput::
49-
50-
[[0.49925193 0.50074804]
51-
[0.4944601 0.5055399 ]]
5245
5346
"""
5447

@@ -61,13 +54,14 @@ def __init__( # noqa: PLR0913
6154
batch_size: PositiveInt = 8,
6255
learning_rate: float = 5e-5,
6356
seed: int = 0,
64-
report_to: REPORTERS_NAMES | None = None, # type: ignore[valid-type]
57+
report_to: REPORTERS_NAMES | Literal["none"] = "none", # type: ignore # noqa: PGH003
6558
encoder_reparameterization_type: Literal["MLP", "LSTM"] = "LSTM",
6659
num_virtual_tokens: PositiveInt = 10,
6760
encoder_dropout: float = 0.1,
6861
encoder_hidden_size: PositiveInt = 128,
6962
encoder_num_layers: PositiveInt = 2,
7063
early_stopping_config: EarlyStoppingConfig | None = None,
64+
print_progress: bool = False,
7165
**ptuning_kwargs: Any, # noqa: ANN401
7266
) -> None:
7367
super().__init__(
@@ -78,6 +72,7 @@ def __init__( # noqa: PLR0913
7872
seed=seed,
7973
report_to=report_to,
8074
early_stopping_config=early_stopping_config,
75+
print_progress=print_progress,
8176
)
8277
self._ptuning_config = PromptEncoderConfig(
8378
task_type=TaskType.SEQ_CLS,

docs/_static/versions.json

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
[
22
{
3-
"name": "v0.0.1 (stable)",
4-
"version": "v0.0.1",
5-
"url": "https://deeppavlov.github.io/AutoIntent/versions/v0.0.1/",
3+
"name": "v0.1.0 (stable)",
4+
"version": "v0.1.0",
5+
"url": "https://deeppavlov.github.io/AutoIntent/versions/v0.1.0/",
66
"preferred": true
77
},
8+
{
9+
"version": "v0.0.1",
10+
"url": "https://deeppavlov.github.io/AutoIntent/versions/v0.0.1/"
11+
},
812
{
913
"version": "dev (dev)",
1014
"url": "https://deeppavlov.github.io/AutoIntent/versions/dev/"

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
project = "AutoIntent"
2525
copyright = "2025, DeepPavlov"
2626
author = "DeepPavlov"
27-
release = "0.1.0"
27+
release = "0.2.0"
2828

2929
# -- General configuration ---------------------------------------------------
3030
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Example of building an intent classifier in a couple of lines of code:
2525
from autointent import Pipeline, Dataset
2626

2727
dataset = Dataset.from_json(path_to_json)
28-
pipeline = Pipeline.from_preset("light_extra")
28+
pipeline = Pipeline.from_preset("classic-light")
2929
pipeline.fit(dataset)
3030
pipeline.predict(["show me my latest recent transactions"])
3131

docs/source/quickstart.rst

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,25 +49,20 @@ To load a dataset from the file system into Python, the :meth:`autointent.Datase
4949
AutoML goes brrr...
5050
-------------------
5151

52-
Once the data is ready, you can start building the optimal classifier from the command line:
53-
54-
.. code-block:: bash
55-
56-
autointent data.train_path="path/to/your/data.json"
57-
58-
This command will start the hyperparameter search in the default :ref:`search space <key-search-space>`.
59-
60-
As a result, a ``runs`` folder will be created in the current working directory, which will save the selected classifier ready for inference.
61-
62-
Similar actions but in a limited mode can be started using the Python API:
52+
Once the data is ready, you can start building the optimal classifier:
6353

6454
.. code-block:: python
6555
6656
from autointent import PipelineOptimizer
6757
68-
pipeline_optimizer = PipelineOptimizer.default(multilabel=False)
58+
pipeline_optimizer = PipelineOptimizer.from_preset("classic-light")
6959
pipeline_optimizer.fit(dataset)
7060
61+
This code starts the hyperparameter search with preset :ref:`search space <key-search-space>`.
62+
63+
As a result, ``runs`` folder will be created in the current working directory, which will save the selected classifier ready for inference.
64+
65+
7166
Inference
7267
---------
7368

docs/source/user_guides.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,4 @@ User Guides
99

1010
user_guides/index_basic_usage
1111
user_guides/index_advanced_usage
12-
user_guides/index_cli_usage
1312
augmentation_tutorials/index

0 commit comments

Comments
 (0)