|
27 | 27 | compute any semi-value, in particular Shapley and Beta values, and Banzhaf |
28 | 28 | indices. |
29 | 29 |
|
30 | | -# Slicing of samplers |
| 30 | +## Slicing of samplers |
31 | 31 |
|
32 | 32 | The samplers can be sliced for parallel computation. For those which are |
33 | 33 | embarrassingly parallel, this is done by slicing the set of "outer" indices and |
|
36 | 36 | and [UniformSampler][pydvl.value.sampler.UniformSampler]. In contrast, slicing a |
37 | 37 | [PermutationSampler][pydvl.value.sampler.PermutationSampler] creates a new |
38 | 38 | sampler which iterates over the same indices. |
| 39 | +
|
| 40 | +
|
| 41 | +## References |
| 42 | +
|
| 43 | +[^1]: <a name="mitchell_sampling_2022"></a>Mitchell, Rory, Joshua Cooper, Eibe |
| 44 | + Frank, and Geoffrey Holmes. [Sampling Permutations for Shapley Value |
| 45 | + Estimation](http://jmlr.org/papers/v23/21-0439.html). Journal of Machine |
| 46 | + Learning Research 23, no. 43 (2022): 1–46. |
| 47 | +
|
39 | 48 | """ |
40 | 49 |
|
41 | 50 | from __future__ import annotations |
@@ -315,18 +324,19 @@ class AntitheticSampler(StochasticSamplerMixin, PowersetSampler[IndexT]): |
315 | 324 | """An iterator to perform uniform random sampling of subsets, and their |
316 | 325 | complements. |
317 | 326 |
|
318 | | - Works as :class:`~pydvl.value.sampler.UniformSampler`, but for every tuple |
319 | | - $(i,S)$, it subsequently returns $(i,S^c)$, where $S^c$ is the complement of |
320 | | - the set $S$, including the index $i$ itself. |
| 327 | + Works as [UniformSampler][pydvl.value.sampler.UniformSampler], but for every |
| 328 | + tuple $(i,S)$, it subsequently returns $(i,S^c)$, where $S^c$ is the |
| 329 | + complement of the set $S$ in the set of indices, excluding $i$. |
321 | 330 | """ |
322 | 331 |
|
323 | 332 | def __iter__(self) -> Iterator[SampleT]: |
324 | 333 | while True: |
325 | 334 | for idx in self.iterindices(): |
326 | | - subset = random_subset(self.complement([idx]), seed=self._rng) |
| 335 | + _complement = self.complement([idx]) |
| 336 | + subset = random_subset(_complement, seed=self._rng) |
327 | 337 | yield idx, subset |
328 | 338 | self._n_samples += 1 |
329 | | - yield idx, self.complement(np.concatenate((subset, np.array([idx])))) |
| 339 | + yield idx, np.setxor1d(_complement, subset) |
330 | 340 | self._n_samples += 1 |
331 | 341 | if self._n_samples == 0: # Empty index set |
332 | 342 | break |
@@ -372,6 +382,29 @@ def weight(cls, n: int, subset_len: int) -> float: |
372 | 382 | return n * math.comb(n - 1, subset_len) if n > 0 else 1.0 |
373 | 383 |
|
374 | 384 |
|
| 385 | +class AntitheticPermutationSampler(PermutationSampler[IndexT]): |
| 386 | + """Samples permutations like |
| 387 | + [PermutationSampler][pydvl.value.sampler.PermutationSampler], but after |
| 388 | + each permutation, it returns the same permutation in reverse order. |
| 389 | +
|
| 390 | + This sampler was suggested in (Mitchell et al. 2022)<sup><a |
| 391 | + href="#mitchell_sampling_2022">1</a></sup> |
| 392 | +
|
| 393 | + !!! tip "New in version 0.7.1" |
| 394 | + """ |
| 395 | + |
| 396 | + def __iter__(self) -> Iterator[SampleT]: |
| 397 | + while True: |
| 398 | + permutation = self._rng.permutation(self._indices) |
| 399 | + for perm in permutation, permutation[::-1]: |
| 400 | + for i, idx in enumerate(perm): |
| 401 | + yield idx, perm[:i] |
| 402 | + self._n_samples += 1 |
| 403 | + |
| 404 | + if self._n_samples == 0: # Empty index set |
| 405 | + break |
| 406 | + |
| 407 | + |
375 | 408 | class DeterministicPermutationSampler(PermutationSampler[IndexT]): |
376 | 409 | """Samples all n! permutations of the indices deterministically, and |
377 | 410 | iterates through them, returning sets as required for the permutation-based |
|
0 commit comments