1+ import itertools
12import logging
23import warnings
3- from typing import Optional , Tuple
4+ from typing import List , NamedTuple , Optional , Sequence , Tuple
45
56import cvxpy as cp
67import numpy as np
78from numpy .typing import NDArray
89
10+ from pydvl .utils import MapReduceJob , ParallelConfig , Status , Utility
11+ from pydvl .value import ValuationResult
12+
913__all__ = [
1014 "_solve_least_core_linear_program" ,
1115 "_solve_egalitarian_least_core_quadratic_program" ,
16+ "lc_solve_problem" ,
17+ "lc_solve_problems" ,
18+ "LeastCoreProblem" ,
1219]
1320
1421logger = logging .getLogger (__name__ )
1522
23+ LeastCoreProblem = NamedTuple (
24+ "LeastCoreProblem" ,
25+ [("utility_values" , NDArray [np .float_ ]), ("A_lb" , NDArray [np .float_ ])],
26+ )
27+
28+
29+ def lc_solve_problem (
30+ problem : LeastCoreProblem , * , u : Utility , algorithm : str , ** options
31+ ) -> ValuationResult :
32+ """Solves a linear problem prepared by :func:`mclc_prepare_problem`.
33+ Useful for parallel execution of multiple experiments by running this as a
34+ remote task.
35+
36+ See :func:`~pydvl.value.least_core.naive.exact_least_core` or
37+ :func:`~pydvl.value.least_core.montecarlo.montecarlo_least_core` for
38+ argument descriptions.
39+ """
40+ if options is None :
41+ options = {}
42+ n = len (u .data )
43+
44+ if np .any (np .isnan (problem .utility_values )):
45+ warnings .warn (
46+ f"Calculation returned "
47+ f"{ np .sum (np .isnan (problem .utility_values ))} NaN "
48+ f"values out of { problem .utility_values .size } " ,
49+ RuntimeWarning ,
50+ )
51+
52+ logger .debug ("Removing possible duplicate values in lower bound array" )
53+ b_lb = problem .utility_values
54+ A_lb , unique_indices = np .unique (problem .A_lb , return_index = True , axis = 0 )
55+ b_lb = b_lb [unique_indices ]
56+
57+ logger .debug ("Building equality constraint" )
58+ A_eq = np .ones ((1 , n ))
59+ # We might have already computed the total utility. That's the index of the
60+ # row in A_lb with all ones.
61+ total_utility_index = np .where (A_lb .sum (axis = 1 ) == n )[0 ]
62+ if len (total_utility_index ) == 0 :
63+ b_eq = np .array ([u (u .data .indices )])
64+ else :
65+ b_eq = b_lb [total_utility_index ]
66+
67+ _ , subsidy = _solve_least_core_linear_program (
68+ A_eq = A_eq , b_eq = b_eq , A_lb = A_lb , b_lb = b_lb , ** options
69+ )
70+
71+ values : Optional [NDArray [np .float_ ]]
72+
73+ if subsidy is None :
74+ logger .debug ("No values were found" )
75+ status = Status .Failed
76+ values = np .empty (n )
77+ values [:] = np .nan
78+ subsidy = np .nan
79+ else :
80+ values = _solve_egalitarian_least_core_quadratic_program (
81+ subsidy ,
82+ A_eq = A_eq ,
83+ b_eq = b_eq ,
84+ A_lb = A_lb ,
85+ b_lb = b_lb ,
86+ ** options ,
87+ )
88+
89+ if values is None :
90+ logger .debug ("No values were found" )
91+ status = Status .Failed
92+ values = np .empty (n )
93+ values [:] = np .nan
94+ subsidy = np .nan
95+ else :
96+ status = Status .Converged
97+
98+ return ValuationResult (
99+ algorithm = algorithm ,
100+ status = status ,
101+ values = values ,
102+ subsidy = subsidy ,
103+ stderr = None ,
104+ data_names = u .data .data_names ,
105+ )
106+
107+
108+ def lc_solve_problems (
109+ problems : Sequence [LeastCoreProblem ],
110+ u : Utility ,
111+ algorithm : str ,
112+ config : ParallelConfig = ParallelConfig (),
113+ n_jobs : int = 1 ,
114+ ** options ,
115+ ) -> List [ValuationResult ]:
116+ """Solves a list of linear problems in parallel.
117+
118+ :param u: Utility.
119+ :param problems: Least Core problems to solve, as returned by
120+ :func:`~pydvl.value.least_core.montecarlo.mclc_prepare_problem`.
121+ :param algorithm: Name of the valuation algorithm.
122+ :param config: Object configuring parallel computation, with cluster
123+ address, number of cpus, etc.
124+ :param n_jobs: Number of parallel jobs to run.
125+ :param options: Additional options to pass to the solver.
126+ :return: List of solutions.
127+ """
128+
129+ def _map_func (
130+ problems : List [LeastCoreProblem ], * args , ** kwargs
131+ ) -> List [ValuationResult ]:
132+ return [lc_solve_problem (p , * args , ** kwargs ) for p in problems ]
133+
134+ map_reduce_job : MapReduceJob [
135+ "LeastCoreProblem" , "List[ValuationResult]"
136+ ] = MapReduceJob (
137+ inputs = problems ,
138+ map_func = _map_func ,
139+ map_kwargs = dict (u = u , algorithm = algorithm , ** options ),
140+ reduce_func = lambda x : list (itertools .chain (* x )),
141+ config = config ,
142+ n_jobs = n_jobs ,
143+ )
144+ solutions = map_reduce_job ()
145+
146+ return solutions
147+
16148
17149def _solve_least_core_linear_program (
18150 A_eq : NDArray [np .float_ ],
19151 b_eq : NDArray [np .float_ ],
20152 A_lb : NDArray [np .float_ ],
21153 b_lb : NDArray [np .float_ ],
22- * ,
23- epsilon : float = 0.0 ,
24154 ** options ,
25155) -> Tuple [Optional [NDArray [np .float_ ]], Optional [float ]]:
26156 """Solves the Least Core's linear program using cvxopt.
@@ -46,7 +176,6 @@ def _solve_least_core_linear_program(
46176 coefficients of a linear inequality constraint on ``x``.
47177 :param b_lb: The inequality constraint vector. Each element represents a
48178 lower bound on the corresponding value of ``A_lb @ x``.
49- :param epsilon: Relaxation value by which the subset utility is decreased.
50179 :param options: Keyword arguments that will be used to select a solver
51180 and to configure it. For all possible options, refer to `cvxpy's documentation
52181 <https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options>`_
@@ -57,13 +186,12 @@ def _solve_least_core_linear_program(
57186
58187 x = cp .Variable (n_variables )
59188 e = cp .Variable ()
60- epsilon_parameter = cp .Parameter (name = "epsilon" , nonneg = True , value = epsilon )
61189
62190 objective = cp .Minimize (e )
63191 constraints = [
64192 e >= 0 ,
65193 A_eq @ x == b_eq ,
66- (A_lb @ x + e * np .ones (len (A_lb ))) >= ( b_lb - epsilon_parameter ) ,
194+ (A_lb @ x + e * np .ones (len (A_lb ))) >= b_lb ,
67195 ]
68196 problem = cp .Problem (objective , constraints )
69197
@@ -110,7 +238,6 @@ def _solve_egalitarian_least_core_quadratic_program(
110238 b_eq : NDArray [np .float_ ],
111239 A_lb : NDArray [np .float_ ],
112240 b_lb : NDArray [np .float_ ],
113- epsilon : float = 0.0 ,
114241 ** options ,
115242) -> Optional [NDArray [np .float_ ]]:
116243 """Solves the egalitarian Least Core's quadratic program using cvxopt.
@@ -137,7 +264,6 @@ def _solve_egalitarian_least_core_quadratic_program(
137264 coefficients of a linear inequality constraint on ``x``.
138265 :param b_lb: The inequality constraint vector. Each element represents a
139266 lower bound on the corresponding value of ``A_lb @ x``.
140- :param epsilon: Relaxation value by which the subset utility is decreased.
141267 :param options: Keyword arguments that will be used to select a solver
142268 and to configure it. Refer to the following page for all possible options:
143269 https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options
@@ -150,12 +276,11 @@ def _solve_egalitarian_least_core_quadratic_program(
150276 n_variables = A_eq .shape [1 ]
151277
152278 x = cp .Variable (n_variables )
153- epsilon_parameter = cp .Parameter (name = "epsilon" , nonneg = True , value = epsilon )
154279
155280 objective = cp .Minimize (cp .norm2 (x ))
156281 constraints = [
157282 A_eq @ x == b_eq ,
158- (A_lb @ x + subsidy * np .ones (len (A_lb ))) >= ( b_lb - epsilon_parameter ) ,
283+ (A_lb @ x + subsidy * np .ones (len (A_lb ))) >= b_lb ,
159284 ]
160285 problem = cp .Problem (objective , constraints )
161286
0 commit comments