Skip to content

Commit f251650

Browse files
authored
Merge pull request #1825 from abigailgold/dev_1.11.1_pdtp
PDTP fixes
2 parents 57cb73b + 30eba65 commit f251650

File tree

3 files changed

+59
-26
lines changed

3 files changed

+59
-26
lines changed

art/metrics/privacy/membership_leakage.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919
This module implements membership leakage metrics.
2020
"""
2121
from __future__ import absolute_import, division, print_function, unicode_literals
22-
from typing import TYPE_CHECKING, Optional
22+
from typing import TYPE_CHECKING, Optional, Tuple
2323

2424
import numpy as np
2525
import scipy
2626

27-
from art.utils import check_and_transform_label_format, is_probability
27+
from art.utils import check_and_transform_label_format, is_probability_array
2828

2929
if TYPE_CHECKING:
3030
from art.estimators.classification.classifier import Classifier
@@ -37,7 +37,7 @@ def PDTP( # pylint: disable=C0103
3737
y: np.ndarray,
3838
indexes: Optional[np.ndarray] = None,
3939
num_iter: Optional[int] = 10,
40-
) -> np.ndarray:
40+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
4141
"""
4242
Compute the pointwise differential training privacy metric for the given classifier and training set.
4343
@@ -52,8 +52,8 @@ def PDTP( # pylint: disable=C0103
5252
computed for all samples in `x`.
5353
:param num_iter: the number of iterations of PDTP computation to run for each sample. If not supplied,
5454
defaults to 10. The result is the average across iterations.
55-
:return: an array containing the average PDTP value for each sample in the training set. The higher the value,
56-
the higher the privacy leakage for that sample.
55+
:return: A tuple of three arrays, containing the average (worse, standard deviation) PDTP value for each sample in
56+
the training set respectively. The higher the value, the higher the privacy leakage for that sample.
5757
"""
5858
from art.estimators.classification.pytorch import PyTorchClassifier
5959
from art.estimators.classification.tensorflow import TensorFlowV2Classifier
@@ -77,14 +77,15 @@ def PDTP( # pylint: disable=C0103
7777
iter_results = []
7878
# get probabilities from original model
7979
pred = target_estimator.predict(x)
80-
if not is_probability(pred):
80+
if not is_probability_array(pred):
8181
try:
8282
pred = scipy.special.softmax(pred, axis=1)
8383
except Exception as exc: # pragma: no cover
8484
raise ValueError("PDTP metric only supports classifiers that output logits or probabilities.") from exc
8585
# divide into 100 bins and return center of bin
8686
bins = np.array(np.arange(0.0, 1.01, 0.01).round(decimals=2))
8787
pred_bin_indexes = np.digitize(pred, bins)
88+
pred_bin_indexes[pred_bin_indexes == 101] = 100
8889
pred_bin = bins[pred_bin_indexes] - 0.005
8990

9091
if not indexes:
@@ -102,10 +103,11 @@ def PDTP( # pylint: disable=C0103
102103
extra_estimator.fit(alt_x, alt_y)
103104
# get probabilities from new model
104105
alt_pred = extra_estimator.predict(x)
105-
if not is_probability(alt_pred):
106+
if not is_probability_array(alt_pred):
106107
alt_pred = scipy.special.softmax(alt_pred, axis=1)
107108
# divide into 100 bins and return center of bin
108109
alt_pred_bin_indexes = np.digitize(alt_pred, bins)
110+
alt_pred_bin_indexes[alt_pred_bin_indexes == 101] = 100
109111
alt_pred_bin = bins[alt_pred_bin_indexes] - 0.005
110112
ratio_1 = pred_bin / alt_pred_bin
111113
ratio_2 = alt_pred_bin / pred_bin
@@ -118,6 +120,8 @@ def PDTP( # pylint: disable=C0103
118120
# We now have a list of list, internal lists represent an iteration. We need to transpose and get averages.
119121
per_sample = list(map(list, zip(*results)))
120122
avg_per_sample = np.array([sum(val) / len(val) for val in per_sample])
123+
worse_per_sample = np.max(per_sample, axis=1)
124+
std_dev_per_sample = np.std(per_sample, axis=1)
121125

122-
# return leakage per sample
123-
return avg_per_sample
126+
# return avg+worse leakage + standard deviation per sample
127+
return avg_per_sample, worse_per_sample, std_dev_per_sample

art/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,6 +1562,24 @@ def is_probability(vector: np.ndarray) -> bool:
15621562
return is_sum_1 and is_smaller_1 and is_larger_0
15631563

15641564

1565+
def is_probability_array(array: np.ndarray) -> bool:
1566+
"""
1567+
Check if a multi-dimensional array is an array of probabilities.
1568+
1569+
:param vector: A numpy array.
1570+
:return: True if it is an array of probabilities.
1571+
"""
1572+
if len(array.shape) == 1:
1573+
return is_probability(array)
1574+
sum_array = np.sum(array, axis=1)
1575+
ones = np.ones_like(sum_array)
1576+
is_sum_1 = np.allclose(sum_array, ones, rtol=1e-03)
1577+
is_smaller_1 = np.amax(array) <= 1.0
1578+
is_larger_0 = np.amin(array) >= 0.0
1579+
1580+
return is_sum_1 and is_smaller_1 and is_larger_0
1581+
1582+
15651583
def pad_sequence_input(x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
15661584
"""
15671585
Apply padding to a batch of 1-dimensional samples such that it has shape of (batch_size, max_length).

tests/metrics/privacy/test_membership_leakage.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,15 @@ def test_membership_leakage_decision_tree(art_warning, decision_tree_estimator,
3535
extra_classifier = decision_tree_estimator()
3636
(x_train, y_train), _ = get_iris_dataset
3737
prev = classifier.model.tree_
38-
leakage = PDTP(classifier, extra_classifier, x_train, y_train)
39-
logger.info("Average PDTP leakage: %.2f", (np.average(leakage)))
40-
logger.info("Max PDTP leakage: %.2f", (np.max(leakage)))
38+
avg_leakage, worse_leakage, std_dev = PDTP(classifier, extra_classifier, x_train, y_train)
39+
logger.info("Average PDTP leakage: %.2f", (np.average(avg_leakage)))
40+
logger.info("Max PDTP leakage: %.2f", (np.max(avg_leakage)))
4141
assert classifier.model.tree_ == prev
42-
assert np.all(leakage >= 1.0)
43-
assert leakage.shape[0] == x_train.shape[0]
42+
assert np.all(avg_leakage >= 1.0)
43+
assert np.all(worse_leakage >= avg_leakage)
44+
assert avg_leakage.shape[0] == x_train.shape[0]
45+
assert worse_leakage.shape[0] == x_train.shape[0]
46+
assert std_dev.shape[0] == x_train.shape[0]
4447
except ARTTestException as e:
4548
art_warning(e)
4649

@@ -51,32 +54,40 @@ def test_membership_leakage_tabular(art_warning, tabular_dl_estimator, get_iris_
5154
classifier = tabular_dl_estimator()
5255
extra_classifier = tabular_dl_estimator()
5356
(x_train, y_train), _ = get_iris_dataset
54-
leakage = PDTP(classifier, extra_classifier, x_train, y_train)
55-
logger.info("Average PDTP leakage: %.2f", (np.average(leakage)))
56-
logger.info("Max PDTP leakage: %.2f", (np.max(leakage)))
57-
assert np.all(leakage >= 1.0)
58-
assert leakage.shape[0] == x_train.shape[0]
57+
avg_leakage, worse_leakage, std_dev = PDTP(classifier, extra_classifier, x_train, y_train)
58+
logger.info("Average PDTP leakage: %.2f", (np.average(avg_leakage)))
59+
logger.info("Max PDTP leakage: %.2f", (np.max(avg_leakage)))
60+
assert np.all(avg_leakage >= 1.0)
61+
assert np.all(worse_leakage >= avg_leakage)
62+
assert avg_leakage.shape[0] == x_train.shape[0]
63+
assert worse_leakage.shape[0] == x_train.shape[0]
64+
assert std_dev.shape[0] == x_train.shape[0]
5965
except ARTTestException as e:
6066
art_warning(e)
6167

6268

63-
@pytest.mark.skip_framework("keras", "kerastf", "tensorflow1", "tensorflow2v1", "mxnet")
69+
@pytest.mark.skip_framework("scikitlearn", "keras", "kerastf", "tensorflow1", "tensorflow2v1", "mxnet")
6470
def test_membership_leakage_image(art_warning, image_dl_estimator, get_default_mnist_subset):
6571
try:
6672
classifier, _ = image_dl_estimator()
6773
extra_classifier, _ = image_dl_estimator()
6874
(x_train, y_train), _ = get_default_mnist_subset
6975
indexes = random.sample(range(x_train.shape[0]), 100)
70-
leakage = PDTP(classifier, extra_classifier, x_train, y_train, indexes=indexes, num_iter=1)
71-
logger.info("Average PDTP leakage: %.2f", (np.average(leakage)))
72-
logger.info("Max PDTP leakage: %.2f", (np.max(leakage)))
73-
assert np.all(leakage >= 1.0)
74-
assert leakage.shape[0] == len(indexes)
76+
avg_leakage, worse_leakage, std_dev = PDTP(
77+
classifier, extra_classifier, x_train, y_train, indexes=indexes, num_iter=1
78+
)
79+
logger.info("Average PDTP leakage: %.2f", (np.average(avg_leakage)))
80+
logger.info("Max PDTP leakage: %.2f", (np.max(avg_leakage)))
81+
assert np.all(avg_leakage >= 1.0)
82+
assert np.all(worse_leakage >= avg_leakage)
83+
assert avg_leakage.shape[0] == 100
84+
assert worse_leakage.shape[0] == 100
85+
assert std_dev.shape[0] == 100
7586
except ARTTestException as e:
7687
art_warning(e)
7788

7889

79-
@pytest.mark.skip_framework("keras", "kerastf", "tensorflow1", "mxnet")
90+
@pytest.mark.skip_framework("scikitlearn", "keras", "kerastf", "tensorflow1", "mxnet")
8091
def test_errors(art_warning, tabular_dl_estimator, get_iris_dataset, image_data_generator):
8192
try:
8293
classifier = tabular_dl_estimator()

0 commit comments

Comments
 (0)