Docs CI (#48)

Samoed · web-flow · commit 1cb47602411e · 2024-11-25T17:28:52.000+03:00
auto deploy documentation to https://deeppavlov.github.io/AutoIntent/
diff --git a/.github/workflows/build-docs.yaml b/.github/workflows/build-docs.yaml
@@ -0,0 +1,64 @@
+name: Build and publish docs
+
+on:
+  push:
+    branches:
+    - dev
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/dev' }}
+
+permissions:
+  contents: write
+
+jobs:
+  publish:
+    name: build and publish docs
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: set up python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: setup poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python -
+
+      - name: Install pandoc
+        run: |
+          sudo apt install pandoc
+
+      - name: install dependencies
+        run: |
+          poetry install --with docs
+
+      - name: build documentation
+        run: |
+          make docs
+
+      - name: save branch name without slashes
+        env:
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+        run: |
+          BRANCH_NAME=${{ env.BRANCH_NAME }}
+          BRANCH_NAME=${BRANCH_NAME////_}
+          echo BRANCH_NAME=${BRANCH_NAME} >> $GITHUB_ENV
+
+      - name: save artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ format('github-pages-for-branch-{0}', env.BRANCH_NAME) }}
+          path: docs/build/
+          retention-days: 3
+
+      - name: deploy website
+        uses: JamesIves/github-pages-deploy-action@v4.6.4
+        with:
+          branch: gh-pages
+          folder: docs/build/html/
+          single-commit: True
diff --git a/Makefile b/Makefile
@@ -29,7 +29,7 @@ sync:
 .PHONY: docs
 docs:
 	$(poetry) sphinx-apidoc -e -E -f --remove-old -o docs/source/apiref autointent
-	$(poetry) python -m sphinx build docs/source docs/build/html
+	$(poetry) python -m sphinx build -b doctest docs/source docs/build/html
 
 .PHONY: serve-docs
 serve-docs: docs
diff --git a/autointent/context/embedder.py b/autointent/context/embedder.py
@@ -16,9 +16,7 @@
 
 
 class EmbedderDumpMetadata(TypedDict):
-    """
-    Metadata for saving and loading an Embedder instance.
-    """
+    """Metadata for saving and loading an Embedder instance."""
 
     batch_size: int
     """Batch size used for embedding calculations."""
diff --git a/autointent/metrics/scoring.py b/autointent/metrics/scoring.py
@@ -33,28 +33,28 @@ def scoring_log_likelihood(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE,
     r"""
     Supports multiclass and multilabel cases.
 
-    Multiclass case
+    Multiclass case:
     Mean negative cross-entropy for each utterance classification result:
 
     .. math::
 
-        \\frac{1}{\\ell}\\sum_{i=1}^{\\ell}\\log(s[y[i]])
+        \frac{1}{\ell}\sum_{i=1}^{\ell}\log(s[y[i]])
 
-    where ``s[y[i]]`` is a predicted score of ``i``\\ th utterance having ground truth label
+    where ``s[y[i]]`` is the predicted score of the ``i``-th utterance having the ground truth label.
 
-    Multilabel case
+    Multilabel case:
     Mean negative binary cross-entropy:
 
     .. math::
 
-        \\frac{1}{\\ell}\\sum_{i=1}^\\ell\\sum_{c=1}^C\\Big[y[i,c]\\cdot\\log(s[i,c])+(1-y[i,c])\\cdot\\log(1-s[i,c])\\Big]
+        \frac{1}{\ell}\sum_{i=1}^\ell\sum_{c=1}^C\Big[y[i,c]\cdot\log(s[i,c])+(1-y[i,c])\cdot\log(1-s[i,c])\Big]
 
-    where ``s[i,c]`` is a predicted score of ``i``\\ th utterance having ground truth label ``c``
+    where ``s[i,c]`` is the predicted score of the ``i``-th utterance having the ground truth label ``c``.
 
-    :param labels: ground truth labels for each utterance
-    :param scores: for each utterance, this list contains scores for each of `n_classes` classes
-    :param eps: small value to avoid division by zero
-    :return: Score of the scoring metric
+    :param labels: Ground truth labels for each utterance.
+    :param scores: For each utterance, a list containing scores for each of `n_classes` classes.
+    :param eps: A small value to avoid division by zero.
+    :return: Score of the scoring metric.
     """
     labels_array, scores_array = transform(labels, scores)
     scores_array[scores_array == 0] = eps
@@ -82,7 +82,7 @@ def scoring_roc_auc(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -> flo
 
     .. math::
 
-        \frac{1}{C}\\sum_{k=1}^C ROCAUC(scores[:, k], labels[:, k])
+        \frac{1}{C}\sum_{k=1}^C ROCAUC(scores[:, k], labels[:, k])
 
     where ``C`` is the number of classes
 
@@ -196,18 +196,19 @@ def scoring_neg_coverage(labels: LABELS_VALUE_TYPE, scores: SCORES_VALUE_TYPE) -
 
     The result is equivalent to executing the following code:
 
-    .. code-block:: python
-
-        scores = np.array(scores)
-        labels = np.array(labels)
-
-        n_classes = scores.shape[1]
-        from scipy.stats import rankdata
-        int_ranks = rankdata(scores, axis=1)  # int ranks are from [1, n_classes]
-        filtered_ranks = int_ranks * labels  # guarantee that 0 labels wont have max rank
-        max_ranks = np.max(filtered_ranks, axis=1)
-        float_ranks = (max_ranks - 1) / (n_classes - 1)  # float ranks are from [0,1]
-        res = 1 - np.mean(float_ranks)
+    >>> def compute_rank_metric():
+    ...     import numpy as np
+    ...     scores = np.array([[1, 2, 3]])
+    ...     labels = np.array([1, 0, 0])
+    ...     n_classes = scores.shape[1]
+    ...     from scipy.stats import rankdata
+    ...     int_ranks = rankdata(scores, axis=1)
+    ...     filtered_ranks = int_ranks * labels
+    ...     max_ranks = np.max(filtered_ranks, axis=1)
+    ...     float_ranks = (max_ranks - 1) / (n_classes - 1)
+    ...     return float(1 - np.mean(float_ranks))
+    >>> print(f"{compute_rank_metric():.1f}")
+    1.0
 
     :param labels: ground truth labels for each utterance
     :param scores: for each utterance, this list contains scores for each of `n_classes` classes
diff --git a/autointent/modules/base.py b/autointent/modules/base.py
@@ -40,10 +40,7 @@ def score(self, context: Context, metric_fn: METRIC_FN) -> float:
 
     @abstractmethod
     def get_assets(self) -> Artifact:
-        """
-        Return useful assets that represent intermediate data into context.
-
-        """
+        """Return useful assets that represent intermediate data into context."""
 
     @abstractmethod
     def clear_cache(self) -> None:
diff --git a/autointent/modules/prediction/base.py b/autointent/modules/prediction/base.py
@@ -53,10 +53,7 @@ def score(self, context: Context, metric_fn: PredictionMetricFn) -> float:
         return metric_fn(labels, self._predictions)
 
     def get_assets(self) -> PredictorArtifact:
-        """
-        Return useful assets that represent intermediate data into context.
-
-        """
+        """Return useful assets that represent intermediate data into context."""
         return PredictorArtifact(labels=self._predictions)
 
     def clear_cache(self) -> None:
diff --git a/autointent/modules/scoring/dnnc/dnnc.py b/autointent/modules/scoring/dnnc/dnnc.py
@@ -49,7 +49,8 @@ class DNNCScorer(ScoringModule):
           archivePrefix={arXiv},
           primaryClass={cs.CL},
           url={https://arxiv.org/abs/2010.13009},
-    }
+        }
+
     """
 
     name = "dnnc"
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -9,12 +9,13 @@
 import os
 import sys
 
+from docs.source.docs_utils.tutorials import generate_tutorial_links_for_notebook_creation
+
 conf_dir = os.path.dirname(os.path.abspath(__file__))  # noqa: PTH100, PTH120
 
 sys.path.insert(0, conf_dir)
 
 from docs_utils.apiref import regenerate_apiref  # noqa: E402
-from docs_utils.tutorials import generate_tutorial_links_for_notebook_creation  # noqa: E402
 
 project = "AutoIntent"
 copyright = "2024, DeepPavlov"
@@ -27,10 +28,20 @@
 extensions = [
     "sphinx.ext.duration",
     "sphinx.ext.autodoc",
-    "sphinx.ext.doctest",
     "sphinx.ext.autosummary",
-    "nbsphinx",
+    "sphinx.ext.doctest",
+    "sphinx.ext.intersphinx",
     "sphinx.ext.todo",
+    "sphinx.ext.coverage",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.extlinks",
+    "sphinx.ext.doctest",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.githubpages",
+    "sphinx_autodoc_typehints",
+    "sphinx_copybutton",
+    "nbsphinx",
 ]
 
 templates_path = ["_templates"]
@@ -51,7 +62,7 @@
     "private-members": True,
     # "special-members": "__call__",
     "member-order": "bysource",
-    # "exclude-members": "_abc_impl, model_fields, model_computed_fields, model_config",
+    "exclude-members": "_abc_impl, model_fields, model_computed_fields, model_config",
 }
 
 # Finding tutorials directories
@@ -60,6 +71,8 @@
 :tutorial_name: {{ env.docname }}
 """
 
+mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"
+
 
 def setup(_) -> None:  # noqa: ANN001
     generate_tutorial_links_for_notebook_creation(
diff --git a/docs/source/docs_utils/notebook.py b/docs/source/docs_utils/notebook.py
@@ -3,13 +3,9 @@
 from typing import ClassVar, Literal
 
 import nbformat
+from jupytext import jupytext
 from pydantic import BaseModel
 
-try:
-    from jupytext import jupytext
-except ImportError:
-    jupytext = None
-
 
 class ReplacePattern(BaseModel, abc.ABC):
     """
@@ -78,7 +74,7 @@ class DocumentationLink(ReplacePattern):
     USAGE EXAMPLES
     --------------
 
-    %doclink(api,index_pipeline)) -> ../apiref/index_pipeline.rst
+    %doclink(api,index_pipeline) -> ../apiref/index_pipeline.rst
 
     %doclink(api,script.core.script) -> ../apiref/chatsky.script.core.script.rst
 
@@ -243,7 +239,4 @@ def py_percent_to_notebook(text: str) -> nbformat.NotebookNode:
         # %% [raw]
         # This is a raw cell
     """
-    if jupytext is None:
-        msg = "`doc` dependencies are not installed."
-        raise ModuleNotFoundError(msg)
     return jupytext.reads(apply_replace_patterns(text), "py:percent")
diff --git a/docs/source/learn/dialogue_systems.rst b/docs/source/learn/dialogue_systems.rst
@@ -4,7 +4,7 @@ Dialogue Systems
 В этом разделе вы познакомитесь с основами построения диалоговых систем.
 
 Intents
------
+-------
 
 Диалоговая система в широком смысле --- это текстовый интерфейс взаимодействия с некоторым сервисом (будь то сервис по заказу еды или по получению информации о банковском счете). Обычно сервис поддерживает конечное количество API методов, которые вызываются во время диалога с пользователем. Чтобы определить, какой метод нужен в данный момент диалога используются классификаторы интентов. Если рассуждать в терминах машинного обучения, то это задача классификации текстов. 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,6 @@ hydra-core = "^1.3.2"
 faiss-cpu = "^1.9.0"
 openai = "^1.52.1"
 datasets = "2.20.0"
-ruff = "^0.8.0"
 
 
 [tool.poetry.group.dev]
@@ -30,6 +29,7 @@ optional = true
 tach = "^0.11.3"
 ipykernel = "^6.29.5"
 ipywidgets = "^8.1.5"
+ruff = "^0.8.0"
 
 [tool.poetry.group.test]
 optional = true
@@ -58,6 +58,7 @@ jupytext = "^1.16.4"
 nbsphinx = "^0.9.5"
 sphinx-autodoc-typehints = "^2.5.0"
 sphinx-copybutton = "^0.5.2"
+jupyter = "^1.1.1"
 
 [tool.poetry.group.docs]
 optional = true
@@ -76,7 +77,6 @@ target-version = "py310"
 [tool.ruff.lint]
 select = ["ALL"]
 ignore = [
-    "D",       # pydocstyle
     "TD",      # todos
     "FIX",     # fixmes
     "S311",    # random usage
@@ -92,7 +92,7 @@ ignore = [
 "autointent/modules/*" = ["ARG002", "ARG003"]  # unused argument
 "docs/*" = ["INP001", "A001", "D"]
 "*/utils.py" = ["D104", "D100"]
-"tutorials/*" = ["INP001", "T"]
+"tutorials/*" = ["INP001", "T", "D"]
 
 [tool.ruff.lint.pylint]
 max-args = 10
diff --git a/tutorials/modules/prediction/argmax.py b/tutorials/modules/prediction/argmax.py
@@ -18,5 +18,6 @@
 # %%
 predictor = ArgmaxPredictor()
 scores = np.array([[0.1, 0.9], [0.8, 0.2], [0.3, 0.7]])
+predictor.fit(scores, [0, 1, 0])
 predictions = predictor.predict(scores)
 np.testing.assert_array_equal(predictions, np.array([1, 0, 1]))
diff --git a/tutorials/pipeline_optimization/demo.py b/tutorials/pipeline_optimization/demo.py
@@ -7,6 +7,7 @@
 import importlib.resources as ires
 from pathlib import Path
 from typing import Literal
+from uuid import uuid4
 
 from autointent.configs.optimization_cli import (
     DataConfig,
@@ -17,12 +18,18 @@
 )
 from autointent.pipeline.optimization.cli_endpoint import main as optimize_pipeline
 from autointent.pipeline.optimization.utils import load_config
-from tests.conftest import setup_environment
 
 # %%
 TaskType = Literal["multiclass", "multilabel", "description"]
 
 
+def setup_environment() -> tuple[str, str, str]:
+    logs_dir = ires.files("tests").joinpath("logs") / str(uuid4())
+    db_dir = logs_dir / "db"
+    dump_dir = logs_dir / "modules_dump"
+    return db_dir, dump_dir, logs_dir
+
+
 def get_search_space_path(task_type: TaskType) -> None:
     return ires.files("tests.assets.configs").joinpath(f"{task_type}.yaml")
 
@@ -55,4 +62,4 @@ def optimize(task_type: TaskType) -> None:
 
 
 # %%
-optimize("multiclass")
+# optimize("multiclass")