More docs cleanup

mdbenito · mdbenito · commit 252e2df7c7ed · 2023-09-02T18:05:54.000+02:00
diff --git a/src/pydvl/influence/general.py b/src/pydvl/influence/general.py
@@ -2,7 +2,7 @@
 This module contains influence calculation functions for general
 models, as introduced in (Koh and Liang, 2017)[^1].
 
-## References:
+## References
 
 [^1]: <a name="koh_liang_2017"></a>Koh, P.W., Liang, P., 2017.
     [Understanding Black-box Predictions via Influence Functions](https://proceedings.mlr.press/v70/koh17a.html).
diff --git a/src/pydvl/influence/torch/torch_differentiable.py b/src/pydvl/influence/torch/torch_differentiable.py
@@ -4,7 +4,7 @@
 methods to invert the Hessian vector product. These are used to calculate the
 influence of a training point on the model.
 
-## References:
+## References
 
 [^1]: <a name="koh_liang_2017"></a>Koh, P.W., Liang, P., 2017.
     [Understanding Black-box Predictions via Influence Functions](https://proceedings.mlr.press/v70/koh17a.html).
diff --git a/src/pydvl/utils/utility.py b/src/pydvl/utils/utility.py
@@ -15,7 +15,7 @@
 This module also contains Utility classes for toy games that are used
 for testing and for demonstration purposes.
 
-## References:
+## References
 
 [^1]: <a name="wang_improving_2022"></a>Wang, T., Yang, Y. and Jia, R., 2021.
     [Improving cooperative game theory-based data valuation via data utility learning](https://arxiv.org/abs/2107.06336).
diff --git a/src/pydvl/value/result.py b/src/pydvl/value/result.py
@@ -207,7 +207,8 @@ class ValuationResult(
         extra_values: Additional values that can be passed as keyword arguments.
             This can contain, for example, the least core value.
 
-    :raise ValueError: If input arrays have mismatching lengths.
+    Raises:
+         ValueError: If input arrays have mismatching lengths.
     """
 
     _indices: NDArray[IndexT]
@@ -611,7 +612,8 @@ def update(self, idx: int, new_value: float) -> "ValuationResult":
         Returns:
             A reference to the same, modified result.
 
-        :raises IndexError: If the index is not found.
+        Raises:
+            IndexError: If the index is not found.
         """
         try:
             pos = self._positions[idx]
@@ -632,7 +634,9 @@ def update(self, idx: int, new_value: float) -> "ValuationResult":
     def get(self, idx: Integral) -> ValueItem:
         """Retrieves a ValueItem by data index, as opposed to sort index, like
         the indexing operator.
-        :raises IndexError: If the index is not found.
+
+        Raises:
+             IndexError: If the index is not found.
         """
         try:
             pos = self._positions[idx]
@@ -662,7 +666,8 @@ def to_dataframe(
             A dataframe with two columns, one for the values, with name
                 given as explained in `column`, and another with standard errors for
                 approximate algorithms. The latter will be named `column+'_stderr'`.
-        :raise ImportError: If pandas is not installed
+        Raises:
+             ImportError: If pandas is not installed
         """
         if not pandas:
             raise ImportError("Pandas required for DataFrame export")
@@ -700,7 +705,8 @@ def from_random(
             A valuation result with its status set to
             [Status.Converged][pydvl.utils.status.Status] by default.
 
-        :raises ValueError: If `size` is less than 1.
+        Raises:
+             ValueError: If `size` is less than 1.
 
         !!! tip "Changed in version 0.6.0"
             Added parameter `total`. Check for zero size
diff --git a/src/pydvl/value/semivalues.py b/src/pydvl/value/semivalues.py
@@ -54,7 +54,7 @@
 instead.
 
 
-# References:
+## References
 
 [^1]: <a name="ghorbani_data_2019"></a>Ghorbani, A., Zou, J., 2019.
     [Data Shapley: Equitable Valuation of Data for Machine Learning](http://proceedings.mlr.press/v97/ghorbani19c.html).
diff --git a/src/pydvl/value/shapley/common.py b/src/pydvl/value/shapley/common.py
@@ -82,19 +82,20 @@ def compute_shapley_values(
     Args:
         u: [Utility][pydvl.utils.utility.Utility] object with model, data, and
             scoring function.
-        done: [StoppingCriterion][pydvl.value.stopping.StoppingCriterion] object, used to
-            determine when to stop the computation for Monte Carlo methods. The
-            default is to stop after 100 iterations. See the available criteria
-            in [stopping][pydvl.value.stopping]. It is possible to combine several
-            criteria using boolean operators. Some methods ignore this argument,
-            others require specific subtypes.
+        done: Object used to determine when to stop the computation for Monte
+            Carlo methods. The default is to stop after 100 iterations. See the
+            available criteria in [stopping][pydvl.value.stopping]. It is
+            possible to combine several of them using boolean operators. Some
+            methods ignore this argument, others require specific subtypes.
         n_jobs: Number of parallel jobs (available only to some methods)
-        seed: Either an instance of a numpy random number generator or a seed for it.
+        seed: Either an instance of a numpy random number generator or a seed
+            for it.
         mode: Choose which shapley algorithm to use. See
-            [ShapleyMode][pydvl.value.shapley.ShapleyMode] for a list of allowed value.
+            [ShapleyMode][pydvl.value.shapley.ShapleyMode] for a list of allowed
+            value.
 
     Returns:
-        A [ValuationResult][pydvl.value.result.ValuationResult] object with the results.
+        Object with the results.
 
     """
     progress: bool = kwargs.pop("progress", False)
diff --git a/src/pydvl/value/shapley/gt.py b/src/pydvl/value/shapley/gt.py
@@ -14,7 +14,7 @@
 
 !!! tip "New in version 0.4.0"
 
-# References:
+## References
 
 [^1]: <a name="jia_efficient_2019"></a>Jia, R. et al., 2019.
     [Towards Efficient Data Valuation Based on the Shapley Value](http://proceedings.mlr.press/v89/jia19a.html).
diff --git a/src/pydvl/value/shapley/knn.py b/src/pydvl/value/shapley/knn.py
@@ -2,14 +2,15 @@
 This module contains Shapley computations for K-Nearest Neighbours.
 
 !!! Todo
-    Implement approximate KNN computation for sublinear complexity)
+    Implement approximate KNN computation for sublinear complexity
 
 
-# References:
+## References
 
-[^Y]: <a name="jia_efficient_2019a"></a>Jia, R. et al., 2019.
-    [Efficient Task-Specific Data Valuation for Nearest Neighbor Algorithms](https://doi.org/10.14778/3342263.3342637).
-    In: Proceedings of the VLDB Endowment, Vol. 12, No. 11, pp. 1610–1623.
+[^1]: <a name="jia_efficient_2019a"></a>Jia, R. et al., 2019. [Efficient
+    Task-Specific Data Valuation for Nearest Neighbor
+    Algorithms](https://doi.org/10.14778/3342263.3342637). In: Proceedings of
+    the VLDB Endowment, Vol. 12, No. 11, pp. 1610–1623.
 
 """
 
@@ -43,7 +44,9 @@ def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:
     Returns:
         Object with the data values.
 
-    :raises TypeError: If the model in the utility is not a [KNeighborsClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)
+    Raises:
+        TypeError: If the model in the utility is not a
+            [sklearn.neighbors.KNeighborsClassifier][].
 
     !!! tip "New in version 0.1.0"
 
diff --git a/src/pydvl/value/shapley/montecarlo.py b/src/pydvl/value/shapley/montecarlo.py
@@ -21,19 +21,19 @@
 [truncated_montecarlo_shapley()][pydvl.value.shapley.truncated.truncated_montecarlo_shapley].
 
 !!! info "Also see"
-   It is also possible to use [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]
-   to reduce the number of evaluations of the utility. The method is however
-   typically outperformed by others in this module.
+    It is also possible to use [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]
+    to reduce the number of evaluations of the utility. The method is however
+    typically outperformed by others in this module.
 
 !!! info "Also see"
-   Additionally, you can consider grouping your data points using
-   [GroupedDataset][pydvl.utils.dataset.GroupedDataset] and computing the values of the
-   groups instead. This is not to be confused with "group testing" as
-   implemented in [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]: any of
-   the algorithms mentioned above, including Group Testing, can work to valuate
-   groups of samples as units.
-
-# References:
+    Additionally, you can consider grouping your data points using
+    [GroupedDataset][pydvl.utils.dataset.GroupedDataset] and computing the values
+    of the groups instead. This is not to be confused with "group testing" as
+    implemented in [group_testing_shapley()][pydvl.value.shapley.gt.group_testing_shapley]: any of
+    the algorithms mentioned above, including Group Testing, can work to valuate
+    groups of samples as units.
+
+## References
 
 [^1]: <a name="ghorbani_data_2019"></a>Ghorbani, A., Zou, J., 2019.
     [Data Shapley: Equitable Valuation of Data for Machine Learning](http://proceedings.mlr.press/v97/ghorbani19c.html).
diff --git a/src/pydvl/value/shapley/owen.py b/src/pydvl/value/shapley/owen.py
@@ -1,5 +1,5 @@
 """
-# References:
+## References
 
 [^1]: <a name="okhrati_multilinear_2021"></a>Okhrati, R., Lipani, A., 2021.
     [A Multilinear Sampling Algorithm to Estimate Shapley Values](https://ieeexplore.ieee.org/abstract/document/9412511).
diff --git a/src/pydvl/value/shapley/truncated.py b/src/pydvl/value/shapley/truncated.py
@@ -1,5 +1,5 @@
 """
-# References:
+## References
 
 [^1]: <a name="ghorbani_data_2019"></a>Ghorbani, A., Zou, J., 2019.
     [Data Shapley: Equitable Valuation of Data for Machine Learning](http://proceedings.mlr.press/v97/ghorbani19c.html).
diff --git a/src/pydvl/value/stopping.py b/src/pydvl/value/stopping.py
@@ -31,7 +31,7 @@
 [StoppingCriterion][pydvl.value.stopping.StoppingCriterion] for details on how
 these operations affect the behavior of the stopping criteria.
 
-# References:
+## References
 
 [^1]: <a name="ghorbani_data_2019"></a>Ghorbani, A., Zou, J., 2019.
     [Data Shapley: Equitable Valuation of Data for Machine Learning](http://proceedings.mlr.press/v97/ghorbani19c.html).