deeppavlov · voorhs · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -160,7 +160,7 @@
 nbsphinx_prolog = """
 :tutorial_name: {{ env.docname }}
 """
-nbsphinx_execute = "never"
+# nbsphinx_execute = "never"
 nbsphinx_thumbnails = {
     "user_guides/*": "_static/square-white.svg",
 }

diff --git a/user_guides/advanced/02_search_space.py → user_guides/advanced/02_automl.py b/user_guides/advanced/02_search_space.py → user_guides/advanced/02_automl.py
@@ -1,6 +1,6 @@
 # %% [markdown]
 """
-# Search Space Configuration
+# AutoML Customization
 
 In this guide, you will learn how to configure a custom hyperparameter search space.
 """
@@ -29,15 +29,23 @@
 
 # %% [markdown]
 """
-The ``module_name`` field specifies the name of the module. You can find the names, for example, in...
+The ``module_name`` field specifies the name of the module. You can explore the available names by yourself:
+"""
+
+# %%
+from autointent.modules import SCORING_MODULES, DECISION_MODULES, EMBEDDING_MODULES, REGEX_MODULES
 
-TODO: _Add docs for all available modules._
+print(list(SCORING_MODULES.keys()))
+print(list(DECISION_MODULES.keys()))
+print(list(EMBEDDING_MODULES.keys()))
+print(list(REGEX_MODULES.keys()))
 
+# %% [markdown]
+"""
 All fields except ``module_name`` are lists that define the search space for each hyperparameter (see %mddoclink(class,modules.scoring,KNNScorer)). If you omit them, the default set of hyperparameters will be used:
 """
 
 # %%
-
 linear_module = {"module_name": "linear"}
 
 # %% [markdown]
@@ -110,7 +118,6 @@
 """
 
 # %%
-
 from autointent import Dataset
 
 dataset = Dataset.from_hub("AutoIntent/clinc150_subset")
@@ -124,7 +131,23 @@
 from autointent import Pipeline
 
 pipeline_optimizer = Pipeline.from_search_space(search_space)
-pipeline_optimizer.fit(dataset)
+pipeline_optimizer.fit(dataset, sampler="random")
+
+# %% [markdown]
+"""
+There are three hyperparameter tuning samplers available:
+
+- "random"
+- "brute"
+- "tpe"
+
+All the samplers are implemented with ![optuna](https://optuna.org/).
+"""
+
+# %% [markdown]
+"""
+One can use more versatile %mddoclink(class,,OptimizationConfig) and %mddoclink(method,Pipeline,from_optimization_config).
+"""
 
 # %% [markdown]
 """

diff --git a/user_guides/advanced/03_caching.py b/user_guides/advanced/03_caching.py
diff --git a/user_guides/advanced/04_reporting.py → user_guides/advanced/03_reporting.py b/user_guides/advanced/04_reporting.py → user_guides/advanced/03_reporting.py
diff --git a/user_guides/advanced/05_logging.py → user_guides/advanced/04_logging.py b/user_guides/advanced/05_logging.py → user_guides/advanced/04_logging.py
diff --git a/user_guides/basic_usage/03_automl.py b/user_guides/basic_usage/03_automl.py
@@ -74,6 +74,42 @@
 logging_config = LoggingConfig(project_dir=Path.cwd() / "runs", dump_modules=False, clear_ram=False)
 custom_pipeline.set_config(logging_config)
 
+# %% [markdown]
+"""
+## Default Transformers
+
+One can specify what embedding model and cross-encoder model want to use along with default settings:
+"""
+
+# %%
+from autointent.configs import EmbedderConfig, CrossEncoderConfig
+
+custom_pipeline.set_config(EmbedderConfig(model_name="prajjwal1/bert-tiny", device="cpu"))
+custom_pipeline.set_config(CrossEncoderConfig(model_name="cross-encoder/ms-marco-MiniLM-L2-v2", max_length=8))
+
+# %% [markdown]
+"""
+See the docs for %mddoclink(class,configs,EmbedderConfig) and %mddoclink(class,configs,CrossEncoderConfig) for options available to customize.
+"""
+
+# %% [markdown]
+"""
+## Cross-Validation vs Hold-Out Validation
+
+If you have lots of training and evaluation data, you can use default hold-out validation strategy. If not, you can choose cross-validation and spend a little more time but utilize the full amount of available data for better hyperparameter tuning.
+
+This behavior is controlled with %mddoclink(class,configs,DataConfig):
+"""
+
+# %%
+from autointent.configs import DataConfig
+custom_pipeline.set_config(DataConfig(scheme="cv", n_folds=3))
+
+# %% [markdown]
+"""
+See the docs for %mddoclink(class,configs,DataConfig) for other options available to customize.
+"""
+
 # %% [markdown]
 """
 ## Complete Example
@@ -99,7 +135,43 @@
 custom_pipeline.set_config(logging_config)
 
 # start auto-configuration
-custom_pipeline.fit(dataset)
+context = custom_pipeline.fit(dataset)
 
-# inference
+# inference on-the-fly
 custom_pipeline.predict(["hello world!"])
+
+# %% [markdown]
+"""
+## Dump Results
+
+One can save all results of auto-configuration process to file system (to ``LoggingConfig.dirpath``):
+"""
+
+# %%
+context.dump()
+
+# %% [markdown]
+"""
+Or one can dump only the configured pipeline to any desired location (by default ``LoggingConfig.dirpath``):
+"""
+
+# %%
+custom_pipeline.dump()
+
+# %% [markdown]
+"""
+## Load Pipeline for Inference
+"""
+
+# %%
+loaded_pipe = Pipeline.load(logging_config.dirpath)
+
+# %% [markdown]
+"""
+Since this notebook is launched automatically while building the docs, we will clean the space if you don't mind :)
+"""
+
+# %%
+import shutil
+
+shutil.rmtree(logging_config.dirpath)
diff --git a/user_guides/basic_usage/04_inference.py b/user_guides/basic_usage/04_inference.py
@@ -80,6 +80,7 @@
 # %%
 context = pipeline.fit(dataset)
 context.dump()
+# or pipeline.dump() to save only configured pipeline but not all the optimization assets
 
 # %% [markdown]
 """