Skip to content

Commit e3cd202

Browse files
authored
Merge pull request #243 from appliedAI-Initiative/improve-least-core
Improve Least Core
2 parents 372a341 + 09a47cd commit e3cd202

File tree

12 files changed

+363
-217
lines changed

12 files changed

+363
-217
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
[PR #240](https://github.com/appliedAI-Initiative/pyDVL/pull/240)
3232
- Fixes bug in ray initialization in `RayParallelBackend` class
3333
[PR #239](https://github.com/appliedAI-Initiative/pyDVL/pull/239)
34+
- Improves Least Core methods by solving an egalitarian quadratic program
35+
to find a fair distribution of data value that satisfies the constraints
36+
given the least core value found by solving the least core linear program,
37+
adds cvxpy as a dependency and uses it instead of scipy to solve these programs.
38+
[PR #243](https://github.com/appliedAI-Initiative/pyDVL/pull/243)
3439

3540
## 0.3.0 - 💥 Breaking changes
3641

docs/30-data-valuation.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ Least Core values
419419
^^^^^^^^^^^^^^^^^
420420

421421
Unfortunately, for many cooperative games the Core may be empty.
422-
By relaxing the coalitional rationality property by $e \gt 0$,
422+
By relaxing the coalitional rationality property by a subsidy $e \gt 0$,
423423
we are then able to find approximate payoffs:
424424

425425
$$
@@ -471,14 +471,15 @@ The simplest approximation consists of two relaxations of the Least Core
471471

472472
- Using a fraction of all subsets instead of all possible subsets.
473473

474-
Combined, this gives us the following property:
474+
Combined, this gives us the $(\epsilon, \delta)$-*probably approx-
475+
imate least core* that satisfies the following property:
475476

476477
$$
477478
P_{S\sim D}\left[\sum_{x_i\in S} v_u(x_i) + e^{*} + \epsilon \geq u(S)\right]
478479
\geq 1 - \delta
479480
$$
480481

481-
Where $e^{*}$ is the optimal least core value.
482+
Where $e^{*}$ is the optimal least core subsidy.
482483

483484
With these relaxations, we obtain a polynomial running time.
484485

notebooks/least_core_basic.ipynb

Lines changed: 77 additions & 64 deletions
Large diffs are not rendered by default.

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
numpy>=1.20
22
pandas>=1.3
33
scikit-learn
4+
scipy>=1.7.0
5+
cvxpy>=1.3.0
46
ray>=0.8
57
joblib
68
pymemcache
79
cloudpickle
810
tqdm
911
matplotlib
10-
scipy>=1.7.0
Lines changed: 155 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,184 @@
11
import logging
22
import warnings
3-
from typing import List, Optional, Tuple, Union
3+
from typing import Optional, Tuple
44

5+
import cvxpy as cp
56
import numpy as np
6-
import scipy
77
from numpy.typing import NDArray
88

9-
__all__ = ["_solve_linear_program"]
9+
__all__ = [
10+
"_solve_least_core_linear_program",
11+
"_solve_egalitarian_least_core_quadratic_program",
12+
]
1013

1114
logger = logging.getLogger(__name__)
1215

1316

14-
BOUNDS_TYPE = Union[
15-
Tuple[Optional[float], Optional[float]],
16-
List[Tuple[Optional[float], Optional[float]]],
17-
]
17+
def _solve_least_core_linear_program(
18+
A_eq: NDArray[np.float_],
19+
b_eq: NDArray[np.float_],
20+
A_lb: NDArray[np.float_],
21+
b_lb: NDArray[np.float_],
22+
*,
23+
epsilon: float = 0.0,
24+
**options,
25+
) -> Tuple[Optional[NDArray[np.float_]], Optional[float]]:
26+
"""Solves the Least Core's linear program using cvxopt.
27+
28+
.. math::
29+
30+
\text{minimize} \ & e \\
31+
\mbox{such that} \ & A_{eq} x = b_{eq}, \\
32+
& A_{lb} x + e \ge b_{lb},\\
33+
& A_{eq} x = b_{eq},\\
34+
& x in \mathcal{R}^n , \\
35+
& e \ge 0
36+
37+
where :math:`x` is a vector of decision variables; ,
38+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
39+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
40+
41+
:param A_eq: The equality constraint matrix. Each row of ``A_eq`` specifies the
42+
coefficients of a linear equality constraint on ``x``.
43+
:param b_eq: The equality constraint vector. Each element of ``A_eq @ x`` must equal
44+
the corresponding element of ``b_eq``.
45+
:param A_lb: The inequality constraint matrix. Each row of ``A_lb`` specifies the
46+
coefficients of a linear inequality constraint on ``x``.
47+
:param b_lb: The inequality constraint vector. Each element represents a
48+
lower bound on the corresponding value of ``A_lb @ x``.
49+
:param epsilon: Relaxation value by which the subset utility is decreased.
50+
:param options: Keyword arguments that will be used to select a solver
51+
and to configure it. For all possible options, refer to `cvxpy's documentation
52+
<https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options>`_
53+
"""
54+
logger.debug(f"Solving linear program : {A_eq=}, {b_eq=}, {A_lb=}, {b_lb=}")
55+
56+
n_variables = A_eq.shape[1]
57+
58+
x = cp.Variable(n_variables)
59+
e = cp.Variable()
60+
epsilon_parameter = cp.Parameter(name="epsilon", nonneg=True, value=epsilon)
61+
62+
objective = cp.Minimize(e)
63+
constraints = [
64+
e >= 0,
65+
A_eq @ x == b_eq,
66+
(A_lb @ x + e * np.ones(len(A_lb))) >= (b_lb - epsilon_parameter),
67+
]
68+
problem = cp.Problem(objective, constraints)
69+
70+
solver = options.pop("solver", cp.ECOS)
71+
72+
try:
73+
problem.solve(solver=solver, **options)
74+
except cp.error.SolverError as err:
75+
raise ValueError("Could not solve linear program") from err
76+
77+
if problem.status in cp.settings.SOLUTION_PRESENT:
78+
logger.debug("Problem was solved")
79+
if problem.status in [cp.settings.OPTIMAL_INACCURATE, cp.settings.USER_LIMIT]:
80+
warnings.warn(
81+
"Solver terminated early. Consider increasing the solver's "
82+
"maximum number of iterations in options",
83+
RuntimeWarning,
84+
)
85+
subsidy = e.value.item()
86+
# HACK: sometimes the returned least core subsidy
87+
# is negative but very close to 0
88+
# to avoid any problems with the subsequent quadratic program
89+
# we just set it to 0.0
90+
if subsidy < 0:
91+
warnings.warn(
92+
f"Least core subsidy e={subsidy} is negative but close to zero. "
93+
"It will be set to 0.0",
94+
RuntimeWarning,
95+
)
96+
subsidy = 0.0
97+
return x.value, subsidy
98+
99+
if problem.status in cp.settings.INF_OR_UNB:
100+
warnings.warn(
101+
"Could not find solution due to infeasibility or unboundedness of problem.",
102+
RuntimeWarning,
103+
)
104+
return None, None
18105

19106

20-
def _solve_linear_program(
21-
c: NDArray[np.float_],
107+
def _solve_egalitarian_least_core_quadratic_program(
108+
subsidy: float,
22109
A_eq: NDArray[np.float_],
23110
b_eq: NDArray[np.float_],
24-
A_ub: NDArray[np.float_],
25-
b_ub: NDArray[np.float_],
26-
bounds: BOUNDS_TYPE,
111+
A_lb: NDArray[np.float_],
112+
b_lb: NDArray[np.float_],
113+
epsilon: float = 0.0,
27114
**options,
28115
) -> Optional[NDArray[np.float_]]:
29-
"""Solves a linear program using scipy's :func:`~scipy.optimize.linprog`
30-
function.
31-
32-
.. note::
33-
The following description of the linear program and the parameters is
34-
taken verbatim from scipy
116+
"""Solves the egalitarian Least Core's quadratic program using cvxopt.
35117
36118
.. math::
37119
38-
\min_x \ & c^T x \\
39-
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
120+
\text{minimize} \ & \| x \|_2 \\
121+
\mbox{such that} \ & A_{eq} x = b_{eq}, \\
122+
& A_{lb} x + e \ge b_{lb},\\
40123
& A_{eq} x = b_{eq},\\
41-
& l \leq x \leq u ,
42-
43-
where $x$ is a vector of decision variables; $c$, $b_{ub}$, $b_{eq}$, $l$,
44-
and $u$ are vectors, and $A_{ub}$ and $A_{eq}$ are matrices.
45-
46-
:param c: The coefficients of the linear objective function to be minimized.
47-
:param A_eq: The equality constraint matrix. Each row of ``A_eq`` specifies
48-
the coefficients of a linear equality constraint on ``x``.
49-
:param b_eq: The equality constraint vector. Each element of ``A_eq @ x``
50-
must equal the corresponding element of ``b_eq``.
51-
:param A_ub: The inequality constraint matrix. Each row of ``A_ub``
52-
specifies the coefficients of a linear inequality constraint on ``x``.
53-
:param b_ub: The inequality constraint vector. Each element represents an
54-
upper bound on the corresponding value of ``A_ub @ x``.
55-
:param bounds: A sequence of ``(min, max)`` pairs for each element in ``x``,
56-
defining the minimum and maximum values of that decision variable. Use
57-
``None`` to indicate that there is no bound. By default, bounds are
58-
``(0, None)`` (all decision variables are non-negative). If a single
59-
tuple ``(min, max)`` is provided, then ``min`` and ``max`` will serve as
60-
bounds for all decision variables.
61-
:param options: A dictionary of solver options. Refer to scipy's
62-
documentation for all possible values.
124+
& x in \mathcal{R}^n , \\
125+
& e \text{ is a constant.}
126+
127+
where :math:`x` is a vector of decision variables; ,
128+
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
129+
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
130+
131+
:param subsidy: Minimal subsidy returned by :func:`_solve_least_core_linear_program`
132+
:param A_eq: The equality constraint matrix. Each row of ``A_eq`` specifies the
133+
coefficients of a linear equality constraint on ``x``.
134+
:param b_eq: The equality constraint vector. Each element of ``A_eq @ x`` must equal
135+
the corresponding element of ``b_eq``.
136+
:param A_lb: The inequality constraint matrix. Each row of ``A_lb`` specifies the
137+
coefficients of a linear inequality constraint on ``x``.
138+
:param b_lb: The inequality constraint vector. Each element represents a
139+
lower bound on the corresponding value of ``A_lb @ x``.
140+
:param epsilon: Relaxation value by which the subset utility is decreased.
141+
:param options: Keyword arguments that will be used to select a solver
142+
and to configure it. Refer to the following page for all possible options:
143+
https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options
63144
"""
64-
logger.debug(
65-
f"Solving linear programming problem: {c=}, {A_eq=}, {b_eq=}, {A_ub=}, {b_ub=}"
66-
)
145+
logger.debug(f"Solving quadratic program : {A_eq=}, {b_eq=}, {A_lb=}, {b_lb=}")
67146

68-
result: scipy.optimize.OptimizeResult = scipy.optimize.linprog(
69-
c,
70-
A_ub=A_ub,
71-
b_ub=b_ub,
72-
A_eq=A_eq,
73-
b_eq=b_eq,
74-
bounds=bounds,
75-
method="highs-ipm",
76-
options=options,
77-
)
147+
if subsidy < 0:
148+
raise ValueError("The least core subsidy must be non-negative.")
78149

79-
logger.debug(f"{result=}")
150+
n_variables = A_eq.shape[1]
80151

81-
if result.success:
82-
return np.asarray(result.x)
152+
x = cp.Variable(n_variables)
153+
epsilon_parameter = cp.Parameter(name="epsilon", nonneg=True, value=epsilon)
83154

84-
values = None
155+
objective = cp.Minimize(cp.norm2(x))
156+
constraints = [
157+
A_eq @ x == b_eq,
158+
(A_lb @ x + subsidy * np.ones(len(A_lb))) >= (b_lb - epsilon_parameter),
159+
]
160+
problem = cp.Problem(objective, constraints)
85161

86-
if result.status == 1:
87-
warnings.warn(
88-
f"Solver terminated early: '{result.message}'. Consider increasing the solver's maxiter in options"
89-
)
90-
elif result.status == 2:
91-
warnings.warn(
92-
f"Could not find solution due to infeasibility of problem: '{result.message}'. "
93-
"Consider increasing max_iterations",
94-
RuntimeWarning,
95-
)
96-
elif result.status == 3:
97-
warnings.warn(
98-
f"Could not find solution due to unboundedness of problem: '{result.message}'. "
99-
"Consider increasing max_iterations",
100-
RuntimeWarning,
101-
)
102-
else:
162+
solver = options.pop("solver", cp.ECOS)
163+
164+
try:
165+
problem.solve(solver=solver, **options)
166+
except cp.error.SolverError as err:
167+
raise ValueError("Could not solve quadratic program") from err
168+
169+
if problem.status in cp.settings.SOLUTION_PRESENT:
170+
logger.debug("Problem was solved")
171+
if problem.status in [cp.settings.OPTIMAL_INACCURATE, cp.settings.USER_LIMIT]:
172+
warnings.warn(
173+
"Solver terminated early. Consider increasing the solver's "
174+
"maximum number of iterations in options",
175+
RuntimeWarning,
176+
)
177+
return x.value # type: ignore
178+
179+
if problem.status in cp.settings.INF_OR_UNB:
103180
warnings.warn(
104-
f"Could not find solution due to numerical issues: '{result.message}'. "
105-
"Consider increasing max_iterations",
181+
"Could not find solution due to infeasibility or unboundedness of problem.",
106182
RuntimeWarning,
107183
)
108-
return values
184+
return None

0 commit comments

Comments
 (0)