deeppavlov
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autointent/metrics/prediction.py‎
Lines changed: 55 additions & 9 deletions b/‎autointent/metrics/prediction.py‎
Lines changed: 55 additions & 9 deletions
diff --git a/‎autointent/metrics/regexp.py‎
Lines changed: 30 additions & 2 deletions b/‎autointent/metrics/regexp.py‎
Lines changed: 30 additions & 2 deletions
@@ -70,7 +70,7 @@ instance/
 
 # Sphinx documentation
 docs/build/
-docs/source/apiref
+docs/source/autoapi
 docs/source/tutorials
 
 # PyBuilder
 
@@ -29,9 +29,22 @@ def __call__(self, y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> floa
 
 
 def prediction_accuracy(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate prediction accuracy. Supports both multiclass and multilabel.
 
+    The prediction accuracy is calculated as:
+
+    .. math::
+
+        \text{Accuracy} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{true},i} = y_{\text{pred},i})}{N}
+
+    where:
+    - :math:`N` is the total number of samples,
+    - :math:`y_{\text{true},i}` is the true label for the :math:`i`-th sample,
+    - :math:`y_{\text{pred},i}` is the predicted label for the :math:`i`-th sample,
+    - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the condition
+    is true and 0 otherwise.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the prediction accuracy
@@ -41,9 +54,22 @@ def prediction_accuracy(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) ->
 
 
 def _prediction_roc_auc_multiclass(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate roc_auc for multiclass.
 
+    The ROC AUC score for multiclass is calculated as the mean ROC AUC score
+    across all classes, where each class is treated as a binary classification task
+    (one-vs-rest).
+
+    .. math::
+
+        \text{ROC AUC}_{\text{multiclass}} = \frac{1}{K} \sum_{k=1}^K \text{ROC AUC}_k
+
+    where:
+    - :math:`K` is the number of classes,
+    - :math:`\text{ROC AUC}_k` is the ROC AUC score for the :math:`k`-th class,
+    calculated by treating it as a binary classification problem (class :math:`k` vs rest).
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the prediction roc_auc
@@ -61,9 +87,13 @@ def _prediction_roc_auc_multiclass(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VAL
 
 
 def _prediction_roc_auc_multilabel(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate roc_auc for multilabel.
 
+    This function internally uses :func:`sklearn.metrics.roc_auc_score` with `average=macro`. Refer to the
+    `scikit-learn documentation <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html>`__
+    for more details.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the prediction accuracy
@@ -72,12 +102,16 @@ def _prediction_roc_auc_multilabel(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VAL
 
 
 def prediction_roc_auc(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
-    Calculate roc_auc for multiclass and multilabel.
+    r"""
+    Calculate ROC AUC for multiclass and multilabel classification.
+
+    The ROC AUC measures the ability of a model to distinguish between classes.
+    It is calculated as the area under the curve of the true positive rate (TPR)
+    against the false positive rate (FPR) at various threshold settings.
 
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
-    :return: Score of the prediction roc_auc
+    :return: Score of the prediction ROC AUC
     """
     y_true_, y_pred_ = transform(y_true, y_pred)
     if y_pred_.ndim == y_true_.ndim == 1:
@@ -90,9 +124,13 @@ def prediction_roc_auc(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) ->
 
 
 def prediction_precision(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate prediction precision. Supports both multiclass and multilabel.
 
+    This function internally uses :func:`sklearn.metrics.precision_score` with `average=macro`. Refer to the
+    `scikit-learn documentation <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html>`__
+    for more details.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the prediction precision
@@ -101,9 +139,13 @@ def prediction_precision(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -
 
 
 def prediction_recall(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate prediction recall. Supports both multiclass and multilabel.
 
+    This function internally uses :func:`sklearn.metrics.recall_score` with `average=macro`. Refer to the
+    `scikit-learn documentation <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html>`__
+    for more details.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the prediction recall
@@ -112,9 +154,13 @@ def prediction_recall(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> f
 
 
 def prediction_f1(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate prediction f1 score. Supports both multiclass and multilabel.
 
+    This function internally uses :func:`sklearn.metrics.f1_score` with `average=macro`. Refer to the
+    `scikit-learn documentation <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html>`__
+    for more details.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the prediction accuracy
 
@@ -23,9 +23,22 @@ def __call__(self, y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> floa
 
 
 def regexp_partial_accuracy(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate regexp partial accuracy.
 
+    The regexp partial accuracy is calculated as:
+
+    .. math::
+
+        \text{Partial Accuracy} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{true},i} \in y_{\text{pred},i})}{N}
+
+    where:
+    - :math:`N` is the total number of samples,
+    - :math:`y_{\text{true},i}` is the true label for the :math:`i`-th sample,
+    - :math:`y_{\text{pred},i}` is the predicted label for the :math:`i`-th sample,
+    - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the condition
+    is true and 0 otherwise.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the regexp metric
@@ -39,9 +52,24 @@ def regexp_partial_accuracy(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE
 
 
 def regexp_partial_precision(y_true: LABELS_VALUE_TYPE, y_pred: LABELS_VALUE_TYPE) -> float:
-    """
+    r"""
     Calculate regexp partial precision.
 
+    The regexp partial precision is calculated as:
+
+    .. math::
+
+        \text{Partial Precision} = \frac{\sum_{i=1}^N \mathbb{1}(y_{\text{true},i}
+        \in y_{\text{pred},i})}{\sum_{i=1}^N \mathbb{1}(|y_{\text{pred},i}| > 0)}
+
+    where:
+    - :math:`N` is the total number of samples,
+    - :math:`y_{\text{true},i}` is the true label for the :math:`i`-th sample,
+    - :math:`y_{\text{pred},i}` is the predicted label for the :math:`i`-th sample,
+    - :math:`|y_{\text{pred},i}|` is the number of predicted labels for the :math:`i`-th sample,
+    - :math:`\mathbb{1}(\text{condition})` is the indicator function that equals 1 if the condition
+    is true and 0 otherwise.
+
     :param y_true: True values of labels
     :param y_pred: Predicted values of labels
     :return: Score of the regexp metric