diff --git a/docs/source/conf.py b/docs/source/conf.py index c7af5eae0..cad118b6c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -160,7 +160,7 @@ nbsphinx_prolog = """ :tutorial_name: {{ env.docname }} """ -nbsphinx_execute = "never" +# nbsphinx_execute = "never" nbsphinx_thumbnails = { "user_guides/*": "_static/square-white.svg", } diff --git a/user_guides/advanced/02_search_space.py b/user_guides/advanced/02_automl.py similarity index 80% rename from user_guides/advanced/02_search_space.py rename to user_guides/advanced/02_automl.py index e4547751b..c09cee33c 100644 --- a/user_guides/advanced/02_search_space.py +++ b/user_guides/advanced/02_automl.py @@ -1,6 +1,6 @@ # %% [markdown] """ -# Search Space Configuration +# AutoML Customization In this guide, you will learn how to configure a custom hyperparameter search space. """ @@ -29,15 +29,23 @@ # %% [markdown] """ -The ``module_name`` field specifies the name of the module. You can find the names, for example, in... +The ``module_name`` field specifies the name of the module. You can explore the available names by yourself: +""" + +# %% +from autointent.modules import SCORING_MODULES, DECISION_MODULES, EMBEDDING_MODULES, REGEX_MODULES -TODO: _Add docs for all available modules._ +print(list(SCORING_MODULES.keys())) +print(list(DECISION_MODULES.keys())) +print(list(EMBEDDING_MODULES.keys())) +print(list(REGEX_MODULES.keys())) +# %% [markdown] +""" All fields except ``module_name`` are lists that define the search space for each hyperparameter (see %mddoclink(class,modules.scoring,KNNScorer)). If you omit them, the default set of hyperparameters will be used: """ # %% - linear_module = {"module_name": "linear"} # %% [markdown] @@ -110,7 +118,6 @@ """ # %% - from autointent import Dataset dataset = Dataset.from_hub("AutoIntent/clinc150_subset") @@ -124,7 +131,23 @@ from autointent import Pipeline pipeline_optimizer = Pipeline.from_search_space(search_space) -pipeline_optimizer.fit(dataset) +pipeline_optimizer.fit(dataset, sampler="random") + +# %% [markdown] +""" +There are three hyperparameter tuning samplers available: + +- "random" +- "brute" +- "tpe" + +All the samplers are implemented with ![optuna](https://optuna.org/). +""" + +# %% [markdown] +""" +One can use more versatile %mddoclink(class,,OptimizationConfig) and %mddoclink(method,Pipeline,from_optimization_config). +""" # %% [markdown] """ diff --git a/user_guides/advanced/03_caching.py b/user_guides/advanced/03_caching.py deleted file mode 100644 index 63532533b..000000000 --- a/user_guides/advanced/03_caching.py +++ /dev/null @@ -1,6 +0,0 @@ -# %% [markdown] -""" -# Caching in AutoIntent - -TODO -""" diff --git a/user_guides/advanced/04_reporting.py b/user_guides/advanced/03_reporting.py similarity index 100% rename from user_guides/advanced/04_reporting.py rename to user_guides/advanced/03_reporting.py diff --git a/user_guides/advanced/05_logging.py b/user_guides/advanced/04_logging.py similarity index 100% rename from user_guides/advanced/05_logging.py rename to user_guides/advanced/04_logging.py diff --git a/user_guides/basic_usage/03_automl.py b/user_guides/basic_usage/03_automl.py index 43f692eb8..07f22fb02 100644 --- a/user_guides/basic_usage/03_automl.py +++ b/user_guides/basic_usage/03_automl.py @@ -74,6 +74,42 @@ logging_config = LoggingConfig(project_dir=Path.cwd() / "runs", dump_modules=False, clear_ram=False) custom_pipeline.set_config(logging_config) +# %% [markdown] +""" +## Default Transformers + +One can specify what embedding model and cross-encoder model want to use along with default settings: +""" + +# %% +from autointent.configs import EmbedderConfig, CrossEncoderConfig + +custom_pipeline.set_config(EmbedderConfig(model_name="prajjwal1/bert-tiny", device="cpu")) +custom_pipeline.set_config(CrossEncoderConfig(model_name="cross-encoder/ms-marco-MiniLM-L2-v2", max_length=8)) + +# %% [markdown] +""" +See the docs for %mddoclink(class,configs,EmbedderConfig) and %mddoclink(class,configs,CrossEncoderConfig) for options available to customize. +""" + +# %% [markdown] +""" +## Cross-Validation vs Hold-Out Validation + +If you have lots of training and evaluation data, you can use default hold-out validation strategy. If not, you can choose cross-validation and spend a little more time but utilize the full amount of available data for better hyperparameter tuning. + +This behavior is controlled with %mddoclink(class,configs,DataConfig): +""" + +# %% +from autointent.configs import DataConfig +custom_pipeline.set_config(DataConfig(scheme="cv", n_folds=3)) + +# %% [markdown] +""" +See the docs for %mddoclink(class,configs,DataConfig) for other options available to customize. +""" + # %% [markdown] """ ## Complete Example @@ -99,7 +135,43 @@ custom_pipeline.set_config(logging_config) # start auto-configuration -custom_pipeline.fit(dataset) +context = custom_pipeline.fit(dataset) -# inference +# inference on-the-fly custom_pipeline.predict(["hello world!"]) + +# %% [markdown] +""" +## Dump Results + +One can save all results of auto-configuration process to file system (to ``LoggingConfig.dirpath``): +""" + +# %% +context.dump() + +# %% [markdown] +""" +Or one can dump only the configured pipeline to any desired location (by default ``LoggingConfig.dirpath``): +""" + +# %% +custom_pipeline.dump() + +# %% [markdown] +""" +## Load Pipeline for Inference +""" + +# %% +loaded_pipe = Pipeline.load(logging_config.dirpath) + +# %% [markdown] +""" +Since this notebook is launched automatically while building the docs, we will clean the space if you don't mind :) +""" + +# %% +import shutil + +shutil.rmtree(logging_config.dirpath) diff --git a/user_guides/basic_usage/04_inference.py b/user_guides/basic_usage/04_inference.py index 02ad3362a..ce8704c05 100644 --- a/user_guides/basic_usage/04_inference.py +++ b/user_guides/basic_usage/04_inference.py @@ -80,6 +80,7 @@ # %% context = pipeline.fit(dataset) context.dump() +# or pipeline.dump() to save only configured pipeline but not all the optimization assets # %% [markdown] """