From 02e2850dcc63a83d28e1402779bbbb77f9d89806 Mon Sep 17 00:00:00 2001 From: SandeepJabez Date: Mon, 4 Jul 2022 13:11:15 +0530 Subject: [PATCH 1/6] gaussianmechanism --- src/pydp/ml/mechanisms/sklearn_pipeline.py | 139 ++++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) diff --git a/src/pydp/ml/mechanisms/sklearn_pipeline.py b/src/pydp/ml/mechanisms/sklearn_pipeline.py index cfd45aa6..749f8199 100644 --- a/src/pydp/ml/mechanisms/sklearn_pipeline.py +++ b/src/pydp/ml/mechanisms/sklearn_pipeline.py @@ -7,7 +7,7 @@ import numpy as np -class LaplaceMechanism(BaseEstimator, TransformerMixin): +class LaplaceMechanism (BaseEstimator, TransformerMixin): """ An SKLearn Pipeline operator for applying differentially private noise addition using the laplace mechanism. @@ -132,4 +132,139 @@ def transform(self, X, y=None): return noised_array else: X = self.sensitivity_calculation( X) - return X \ No newline at end of file + return X + + + + + +# for gaussian +class GaussianMechanism(BaseEstimator, TransformerMixin): + """ + An SKLearn Pipeline operator for applying differentially private noise + addition using the gaussian mechanism. + + """ + + def __init__(self, delta=1.0, sensitivity=1, accountant=None): + + """ + Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready + to be used. + + Parameters + ---------- + delta : float or int + The value of delta for achieving :math:`\delta`-differential privacy with the mechanism. Must have + `delta > 0`. + + sensitivity : float or int + The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + + Attributes + ------- + delta + Privacy budget to calculate noise. + + sensitivty + Sensitivity of the mechanism to calculate noise. + + accountant + Accountant to keep track of privacy budget. + + Raises + ------ + TypeError + If delta is not a number, or sensitivity is not a number or a callable. + + ValueError + If delta less than 0, or sensitivty is a number but less than 0. + """ + + if not isinstance(delta, numbers.Number): + raise TypeError(f"Delta must be a number. Got type {type(delta)}.") + + if delta <= 0: + raise ValueError("Delta must be at least larger than 0.") + + self.delta = delta + + if not isinstance(sensitivity, numbers.Number): + if not callable(sensitivity): + raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") + + + if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: + raise ValueError("Sensitivity must be at least larger than 0.") + + self.sensitivity = sensitivity + self.accountant = BudgetAccountant.load_default(accountant) + + self.gaussian = None + + if not callable(sensitivity): + self.gaussian = Gaussian() + + def sensitivity_calculation(self, X): + """ + Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity + provided if a callable. + + + Parameters + ---------- + X : numpy.array + Datset in the form of a 2-dimensional numpy array. + + Returns + ------ + X : numpy.array + Original parameter X with differentially private noise added. + """ + + n_feature = X.shape[-1] + n_data = X.shape[0] + + + for data_idx in range(n_data): + self.accountant.check(self.delta, 0) + for feature_idx in range(n_feature): + + # Array with data point data_idx removed for feature_idx + feature = np.concatenate((X[:data_idx,feature_idx],X[data_idx + 1:,feature_idx])) + + # Calculate sensitivity + sensitivity_ = self.sensitivity(feature) + + # Initialized Gaussian mechanism instance + gaussian = Gaussian().set_delta(self.delta).set_sensitivity(sensitivity_) + + # Add noise to the data point that was removed + noised_value = gaussian.randomise(X[data_idx,feature_idx]) + + # Replaced data point in the dataset with noised version + X[data_idx,feature_idx] = noised_value + + self.accountant.spend(self.delta, 0) + return X + + def fit(self, X, y=None): + return self + + def transform(self, X, y=None): + if self.gaussian is not None: + self.gaussian.set_delta(self.delta).set_sensitivity(self.sensitivity) + vector_randomise = np.vectorize(self.gaussian.randomise) + noised_array = vector_randomise(X) + return noised_array + else: + X = self.sensitivity_calculation( X) + return X + + + + \ No newline at end of file From 312b521b7bc8a82fd13bba37b91f2580da6963fc Mon Sep 17 00:00:00 2001 From: SandeepJabez Date: Tue, 5 Jul 2022 21:13:03 +0530 Subject: [PATCH 2/6] Made some changes --- src/pydp/ml/mechanisms/sklearn_pipeline.py | 213 +++++++++++++++------ 1 file changed, 153 insertions(+), 60 deletions(-) diff --git a/src/pydp/ml/mechanisms/sklearn_pipeline.py b/src/pydp/ml/mechanisms/sklearn_pipeline.py index 749f8199..e475af7c 100644 --- a/src/pydp/ml/mechanisms/sklearn_pipeline.py +++ b/src/pydp/ml/mechanisms/sklearn_pipeline.py @@ -145,70 +145,81 @@ class GaussianMechanism(BaseEstimator, TransformerMixin): addition using the gaussian mechanism. """ - - def __init__(self, delta=1.0, sensitivity=1, accountant=None): - - """ - Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready - to be used. - - Parameters - ---------- - delta : float or int - The value of delta for achieving :math:`\delta`-differential privacy with the mechanism. Must have - `delta > 0`. - - sensitivity : float or int - The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. - - accountant : BudgetAccountant, optional - Accountant to keep track of privacy budget. - - - Attributes - ------- - delta - Privacy budget to calculate noise. - - sensitivty - Sensitivity of the mechanism to calculate noise. - - accountant - Accountant to keep track of privacy budget. + """ + static std::unique_ptr build(double epsilon, double delta, + double l2_sensitivity) { + dp::GaussianMechanism::Builder builder; + builder.SetEpsilon(epsilon); + builder.SetDelta(delta); + builder.SetL2Sensitivity(l2_sensitivity); + return downcast_unique_ptr( + builder.Build().value()); + }; + + Gaussian depends on these parameters + """ + def __init__(self, epsilon=1.0, delta=1.0, sensitivity=1, accountant=None): + """ + Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready + to be used. + + Parameters + ---------- + epsilon : float or int + The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have + `epsilon > 0`. + + sensitivity : float or int + The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. - Raises - ------ - TypeError - If delta is not a number, or sensitivity is not a number or a callable. - - ValueError - If delta less than 0, or sensitivty is a number but less than 0. - """ + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. - if not isinstance(delta, numbers.Number): - raise TypeError(f"Delta must be a number. Got type {type(delta)}.") - - if delta <= 0: - raise ValueError("Delta must be at least larger than 0.") - - self.delta = delta - if not isinstance(sensitivity, numbers.Number): - if not callable(sensitivity): - raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") - - - if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: - raise ValueError("Sensitivity must be at least larger than 0.") + Attributes + ------- + epsilon + Privacy budget to calculate noise. - self.sensitivity = sensitivity - self.accountant = BudgetAccountant.load_default(accountant) + sensitivty + Sensitivity of the mechanism to calculate noise. + + accountant + Accountant to keep track of privacy budget. + + Raises + ------ + TypeError + If epsilon is not a number, or sensitivity is not a number or a callable. - self.gaussian = None + ValueError + If epsilon less than 0, or sensitivty is a number but less than 0. + """ + + if not isinstance(epsilon, numbers.Number): + raise TypeError(f"Epsilon must be a number. Got type {type(epsilon)}.") + if epsilon <= 0: + raise ValueError("Epsilon must be at least larger than 0.") + + self.epsilon = epsilon + + if not isinstance(sensitivity, numbers.Number): if not callable(sensitivity): + raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") + + + if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: + raise ValueError("Sensitivity must be at least larger than 0.") + + self.sensitivity = sensitivity + self.accountant = BudgetAccountant.load_default(accountant) + + self.gaussian = None + + if not callable(sensitivity): self.gaussian = Gaussian() - + def sensitivity_calculation(self, X): """ Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity @@ -231,7 +242,7 @@ def sensitivity_calculation(self, X): for data_idx in range(n_data): - self.accountant.check(self.delta, 0) + self.accountant.check(self.epsilon, 0) for feature_idx in range(n_feature): # Array with data point data_idx removed for feature_idx @@ -241,7 +252,7 @@ def sensitivity_calculation(self, X): sensitivity_ = self.sensitivity(feature) # Initialized Gaussian mechanism instance - gaussian = Gaussian().set_delta(self.delta).set_sensitivity(sensitivity_) + gaussian = Gaussian().set_epsilon(self.epsilon).set_sensitivity(sensitivity_) # Add noise to the data point that was removed noised_value = gaussian.randomise(X[data_idx,feature_idx]) @@ -249,7 +260,7 @@ def sensitivity_calculation(self, X): # Replaced data point in the dataset with noised version X[data_idx,feature_idx] = noised_value - self.accountant.spend(self.delta, 0) + self.accountant.spend(self.epsilon, 0) return X def fit(self, X, y=None): @@ -257,7 +268,7 @@ def fit(self, X, y=None): def transform(self, X, y=None): if self.gaussian is not None: - self.gaussian.set_delta(self.delta).set_sensitivity(self.sensitivity) + self.gaussian.set_epsilon(self.epsilon).set_sensitivity(self.sensitivity) vector_randomise = np.vectorize(self.gaussian.randomise) noised_array = vector_randomise(X) return noised_array @@ -265,6 +276,88 @@ def transform(self, X, y=None): X = self.sensitivity_calculation( X) return X + + + + +# geometric has lambda +def GeometricMechanism(BaseEstimator, TransformerMixin): + """ + An SKLearn Pipeline operator for applying differentially private noise + addition using the geometric mechanism. + + """ + + def __init__(self, lambda_=1.0, sensitivity=1, accountant=None): + + """ + Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready + to be used. + + Parameters + ---------- + lambda_ : float or int + The value of lambda for achieving :math:`\lambda`-differential privacy with the mechanism. Must have + `lambda > 0`. + + sensitivity : float or int + The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + + Attributes + ------- + lambda_ + Privacy budget to calculate noise. + + sensitivty + Sensitivity of the mechanism to calculate noise. + + accountant + Accountant to keep track of privacy budget. + + Raises + ------ + TypeError + If lambda_ is not a number, or sensitivity is not a number or a callable. + + ValueError + If lambda_ less than 0, or sensitivty is a number but less than 0. + """ + if not isinstance(lambda_, numbers.Number): + raise TypeError(f"Lambda must be a number. Got type {type(lambda_)}.") + + if lambda_ <= 0: + raise ValueError("Lambda must be at least larger than 0.") + + self.lambda_ = lambda_ + + if not isinstance(sensitivity, numbers.Number): + if not callable(sensitivity): + raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") + + + if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: + raise ValueError("Sensitivity must be at least larger than 0.") + + self.sensitivity = sensitivity + self.accountant = BudgetAccountant. + self.BaseEstimator = BaseEstimator + self.TransformerMixin = TransformerMixin + + def fit(self, X, y=None): + return self + def transform(self, X, y=None): + if self.geometric is not None: + self.geometric.set_delta(self.delta).set_sensitivity(self.sensitivity) + vector_randomise = np.vectorize(self.geometric.randomise) + noised_array = vector_randomise(X) + return noised_array + else: + X = self.sensitivity_calculation( X) + return X \ No newline at end of file From acd7140b36519531044c46f822f75a744c6958ba Mon Sep 17 00:00:00 2001 From: SandeepJabez Date: Sat, 16 Jul 2022 06:10:02 +0530 Subject: [PATCH 3/6] merge conflict --- src/pydp/ml/mechanisms/sklearn_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pydp/ml/mechanisms/sklearn_pipeline.py b/src/pydp/ml/mechanisms/sklearn_pipeline.py index e475af7c..a9c47908 100644 --- a/src/pydp/ml/mechanisms/sklearn_pipeline.py +++ b/src/pydp/ml/mechanisms/sklearn_pipeline.py @@ -7,7 +7,7 @@ import numpy as np -class LaplaceMechanism (BaseEstimator, TransformerMixin): +class LaplaceMechanism(BaseEstimator, TransformerMixin): """ An SKLearn Pipeline operator for applying differentially private noise addition using the laplace mechanism. From 7ad514dc36283f8405058cec156b1c1ffafc5d76 Mon Sep 17 00:00:00 2001 From: SandeepJabez Date: Sat, 16 Jul 2022 06:22:34 +0530 Subject: [PATCH 4/6] Removing conflicts --- src/pydp/ml/mechanisms/sklearn_pipeline.py | 175 +++++++++++++-------- 1 file changed, 111 insertions(+), 64 deletions(-) diff --git a/src/pydp/ml/mechanisms/sklearn_pipeline.py b/src/pydp/ml/mechanisms/sklearn_pipeline.py index a9c47908..bd42d3b9 100644 --- a/src/pydp/ml/mechanisms/sklearn_pipeline.py +++ b/src/pydp/ml/mechanisms/sklearn_pipeline.py @@ -9,132 +9,178 @@ class LaplaceMechanism(BaseEstimator, TransformerMixin): """ - An SKLearn Pipeline operator for applying differentially private noise - addition using the laplace mechanism. - Paper link: https://link.springer.com/content/pdf/10.1007/11681878_14.pdf + An SKLearn Pipeline operator for applying differentially private noise + addition using the laplace mechanism. + Paper link: https://link.springer.com/content/pdf/10.1007/11681878_14.pdf """ - - def __init__(self, epsilon=1.0, sensitivity=1, accountant=None): - + + def __init__( + self, + epsilon=1.0, + sensitivity=1, + scale=None, + cat_feat_idxs=None, + cat_sensitivity=None, + ): + """ Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready to be used. - Parameters ---------- epsilon : float or int The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have `epsilon > 0`. - sensitivity : float or int The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. - - accountant : BudgetAccountant, optional - Accountant to keep track of privacy budget. - - + scale : float or int, optional + cat_feat_idxs : list or None, optional + List of integers identifying indicies of categorical features. + cat_feat_idxs : list or None, optional + List of integers identifying indicies of categorical features. + cat_feat_idxs : list or None, optional + List of integers identifying indicies of categorical features. + cat_feat_idxs : list or None, optional + List of integers identifying indicies of categorical features. Attributes ------- epsilon Privacy budget to calculate noise. - sensitivty Sensitivity of the mechanism to calculate noise. - - accountant - Accountant to keep track of privacy budget. - + scale + cat_feat_idxs + List of indicies that identifies categorical features. + cat_sensitivty + Sensitivity of the mechanism to calculate noise for categorical data. Raises ------ TypeError If epsilon is not a number, or sensitivity is not a number or a callable. - ValueError If epsilon less than 0, or sensitivty is a number but less than 0. """ - + if not isinstance(epsilon, numbers.Number): raise TypeError(f"Epsilon must be a number. Got type {type(epsilon)}.") if epsilon <= 0: raise ValueError("Epsilon must be at least larger than 0.") - + self.epsilon = epsilon - + if not isinstance(sensitivity, numbers.Number): if not callable(sensitivity): - raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") + raise TypeError( + f"Sensitivity must be a number or callable. Got type {type(sensitivity)}." + ) - if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: raise ValueError("Sensitivity must be at least larger than 0.") - + self.sensitivity = sensitivity - self.accountant = BudgetAccountant.load_default(accountant) - - self.laplace = None # If sensitivity is callable, set lapalace to None - - if not callable(sensitivity): - self.laplace = Laplace() - + + if scale is not None: + if not isinstance(scale, numbers.Number): + raise TypeError( + f"Sensitivity must be a int or float. Got type {type(sensitivity)}." + ) + + self.scale = scale + + if (cat_feat_idxs is not None and cat_sensitivity is None) or ( + cat_feat_idxs is None and cat_sensitivity is not None + ): + raise ValueError( + "cat_feat_idxs cannot be None if cat_sensitivity, and vice versa." + ) + + self.categorical_exists = ( + cat_feat_idxs is not None and cat_sensitivity is not None + ) + + if self.categorical_exists: + + if not isinstance(sensitivity, numbers.Number): + if not callable(sensitivity): + raise TypeError( + f"Sensitivity must be a number or callable. Got type {type(sensitivity)}." + ) + + if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: + raise ValueError("Sensitivity must be at least larger than 0.") + + if len(cat_feat_idxs) == 0: + raise ValueError( + "At least 1 categorical feature index must be provided." + ) + + self.cat_feat_idxs = cat_feat_idxs + self.cat_sensitivity = cat_sensitivity + def sensitivity_calculation(self, X): """ - Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity + Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity provided if a callable. - - Parameters ---------- X : numpy.array Datset in the form of a 2-dimensional numpy array. - Returns ------ X : numpy.array Original parameter X with differentially private noise added. """ - + n_feature = X.shape[-1] n_data = X.shape[0] - for data_idx in range(n_data): - self.accountant.check(self.epsilon, 0) for feature_idx in range(n_feature): - + # Array with data point data_idx removed for feature_idx - feature = np.concatenate((X[:data_idx,feature_idx],X[data_idx + 1:,feature_idx])) - + feature = np.concatenate( + (X[:data_idx, feature_idx], X[data_idx + 1 :, feature_idx]) + ) + # Calculate sensitivity - sensitivity_ = self.sensitivity(feature) - + if self.categorical_exists and feature_idx in cat_feat_idxs: + if isinstance(self.cat_sensitivity, numbers.Number): + sensitivity_ = self.cat_sensitivity + print(sensitivity_) + else: + sensitivity_ = self.cat_sensitivity(feature) + print(sensitivity_) + + else: + if isinstance(self.sensitivity, numbers.Number): + sensitivity_ = self.sensitivity + else: + sensitivity_ = self.sensitivity(feature) + # Initialized Laplace mechanism instance - laplace = Laplace().set_epsilon(self.epsilon).set_sensitivity(sensitivity_) - + laplace = LaplaceDistribution( + epsilon=float(self.epsilon), sensitivity=float(sensitivity_) + ) + # Add noise to the data point that was removed - noised_value = laplace.randomise(X[data_idx,feature_idx]) - + if self.scale is not None: + noised_value = X[data_idx, feature_idx] - laplace.sample( + scale=float(self.scale) + ) + else: + noised_value = X[data_idx, feature_idx] - laplace.sample() + # Replaced data point in the dataset with noised version - X[data_idx,feature_idx] = noised_value - - self.accountant.spend(self.epsilon, 0) + X[data_idx, feature_idx] = noised_value return X - - + def fit(self, X, y=None): return self def transform(self, X, y=None): - if self.laplace is not None: - self.laplace.set_epsilon(self.epsilon).set_sensitivity(self.sensitivity) - vector_randomise = np.vectorize(self.laplace.randomise) - noised_array = vector_randomise(X) - return noised_array - else: - X = self.sensitivity_calculation( X) - return X - - + X = self.sensitivity_calculation(X) + return X @@ -360,4 +406,5 @@ def transform(self, X, y=None): else: X = self.sensitivity_calculation( X) return X - \ No newline at end of file + + From 679e30a4acff54b2d781bf625a615f0d1863da06 Mon Sep 17 00:00:00 2001 From: SandeepJabez Date: Thu, 21 Jul 2022 00:08:53 +0530 Subject: [PATCH 5/6] fixing part1 --- src/pydp/ml/mechanisms/sklearn_pipeline.py | 250 ++------------------- 1 file changed, 21 insertions(+), 229 deletions(-) diff --git a/src/pydp/ml/mechanisms/sklearn_pipeline.py b/src/pydp/ml/mechanisms/sklearn_pipeline.py index bd42d3b9..2ecc58ab 100644 --- a/src/pydp/ml/mechanisms/sklearn_pipeline.py +++ b/src/pydp/ml/mechanisms/sklearn_pipeline.py @@ -1,12 +1,12 @@ import numbers -from ..util.accountant import BudgetAccountant -from .laplace import Laplace +from pydp.distributions import LaplaceDistribution # type: ignore from sklearn.base import BaseEstimator, TransformerMixin import numpy as np + class LaplaceMechanism(BaseEstimator, TransformerMixin): """ An SKLearn Pipeline operator for applying differentially private noise @@ -26,37 +26,53 @@ def __init__( """ Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready to be used. + Parameters ---------- epsilon : float or int The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have `epsilon > 0`. + sensitivity : float or int The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. + scale : float or int, optional + cat_feat_idxs : list or None, optional List of integers identifying indicies of categorical features. + cat_feat_idxs : list or None, optional List of integers identifying indicies of categorical features. + cat_feat_idxs : list or None, optional List of integers identifying indicies of categorical features. + cat_feat_idxs : list or None, optional List of integers identifying indicies of categorical features. + + Attributes ------- epsilon Privacy budget to calculate noise. + sensitivty Sensitivity of the mechanism to calculate noise. + scale + + cat_feat_idxs List of indicies that identifies categorical features. + cat_sensitivty Sensitivity of the mechanism to calculate noise for categorical data. + Raises ------ TypeError If epsilon is not a number, or sensitivity is not a number or a callable. + ValueError If epsilon less than 0, or sensitivty is a number but less than 0. """ @@ -122,10 +138,13 @@ def sensitivity_calculation(self, X): """ Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity provided if a callable. + + Parameters ---------- X : numpy.array Datset in the form of a 2-dimensional numpy array. + Returns ------ X : numpy.array @@ -181,230 +200,3 @@ def fit(self, X, y=None): def transform(self, X, y=None): X = self.sensitivity_calculation(X) return X - - - -# for gaussian -class GaussianMechanism(BaseEstimator, TransformerMixin): - """ - An SKLearn Pipeline operator for applying differentially private noise - addition using the gaussian mechanism. - - """ - """ - static std::unique_ptr build(double epsilon, double delta, - double l2_sensitivity) { - dp::GaussianMechanism::Builder builder; - builder.SetEpsilon(epsilon); - builder.SetDelta(delta); - builder.SetL2Sensitivity(l2_sensitivity); - return downcast_unique_ptr( - builder.Build().value()); - }; - - Gaussian depends on these parameters - """ - def __init__(self, epsilon=1.0, delta=1.0, sensitivity=1, accountant=None): - """ - Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready - to be used. - - Parameters - ---------- - epsilon : float or int - The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have - `epsilon > 0`. - - sensitivity : float or int - The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. - - accountant : BudgetAccountant, optional - Accountant to keep track of privacy budget. - - - Attributes - ------- - epsilon - Privacy budget to calculate noise. - - sensitivty - Sensitivity of the mechanism to calculate noise. - - accountant - Accountant to keep track of privacy budget. - - Raises - ------ - TypeError - If epsilon is not a number, or sensitivity is not a number or a callable. - - ValueError - If epsilon less than 0, or sensitivty is a number but less than 0. - """ - - if not isinstance(epsilon, numbers.Number): - raise TypeError(f"Epsilon must be a number. Got type {type(epsilon)}.") - - if epsilon <= 0: - raise ValueError("Epsilon must be at least larger than 0.") - - self.epsilon = epsilon - - if not isinstance(sensitivity, numbers.Number): - if not callable(sensitivity): - raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") - - - if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: - raise ValueError("Sensitivity must be at least larger than 0.") - - self.sensitivity = sensitivity - self.accountant = BudgetAccountant.load_default(accountant) - - self.gaussian = None - - if not callable(sensitivity): - self.gaussian = Gaussian() - - def sensitivity_calculation(self, X): - """ - Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity - provided if a callable. - - - Parameters - ---------- - X : numpy.array - Datset in the form of a 2-dimensional numpy array. - - Returns - ------ - X : numpy.array - Original parameter X with differentially private noise added. - """ - - n_feature = X.shape[-1] - n_data = X.shape[0] - - - for data_idx in range(n_data): - self.accountant.check(self.epsilon, 0) - for feature_idx in range(n_feature): - - # Array with data point data_idx removed for feature_idx - feature = np.concatenate((X[:data_idx,feature_idx],X[data_idx + 1:,feature_idx])) - - # Calculate sensitivity - sensitivity_ = self.sensitivity(feature) - - # Initialized Gaussian mechanism instance - gaussian = Gaussian().set_epsilon(self.epsilon).set_sensitivity(sensitivity_) - - # Add noise to the data point that was removed - noised_value = gaussian.randomise(X[data_idx,feature_idx]) - - # Replaced data point in the dataset with noised version - X[data_idx,feature_idx] = noised_value - - self.accountant.spend(self.epsilon, 0) - return X - - def fit(self, X, y=None): - return self - - def transform(self, X, y=None): - if self.gaussian is not None: - self.gaussian.set_epsilon(self.epsilon).set_sensitivity(self.sensitivity) - vector_randomise = np.vectorize(self.gaussian.randomise) - noised_array = vector_randomise(X) - return noised_array - else: - X = self.sensitivity_calculation( X) - return X - - - - - -# geometric has lambda -def GeometricMechanism(BaseEstimator, TransformerMixin): - """ - An SKLearn Pipeline operator for applying differentially private noise - addition using the geometric mechanism. - - """ - - def __init__(self, lambda_=1.0, sensitivity=1, accountant=None): - - """ - Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready - to be used. - - Parameters - ---------- - lambda_ : float or int - The value of lambda for achieving :math:`\lambda`-differential privacy with the mechanism. Must have - `lambda > 0`. - - sensitivity : float or int - The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. - - accountant : BudgetAccountant, optional - Accountant to keep track of privacy budget. - - - Attributes - ------- - lambda_ - Privacy budget to calculate noise. - - sensitivty - Sensitivity of the mechanism to calculate noise. - - accountant - Accountant to keep track of privacy budget. - - Raises - ------ - TypeError - If lambda_ is not a number, or sensitivity is not a number or a callable. - - ValueError - If lambda_ less than 0, or sensitivty is a number but less than 0. - """ - - if not isinstance(lambda_, numbers.Number): - raise TypeError(f"Lambda must be a number. Got type {type(lambda_)}.") - - if lambda_ <= 0: - raise ValueError("Lambda must be at least larger than 0.") - - self.lambda_ = lambda_ - - if not isinstance(sensitivity, numbers.Number): - if not callable(sensitivity): - raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") - - - if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: - raise ValueError("Sensitivity must be at least larger than 0.") - - self.sensitivity = sensitivity - self.accountant = BudgetAccountant. - self.BaseEstimator = BaseEstimator - self.TransformerMixin = TransformerMixin - - def fit(self, X, y=None): - return self - - def transform(self, X, y=None): - if self.geometric is not None: - self.geometric.set_delta(self.delta).set_sensitivity(self.sensitivity) - vector_randomise = np.vectorize(self.geometric.randomise) - noised_array = vector_randomise(X) - return noised_array - else: - X = self.sensitivity_calculation( X) - return X - - From 2209eb6e1c29630da8ecb7e9453eacbf1982b509 Mon Sep 17 00:00:00 2001 From: SandeepJabez Date: Thu, 21 Jul 2022 00:13:58 +0530 Subject: [PATCH 6/6] fixed errors --- src/pydp/ml/mechanisms/sklearn_pipeline.py | 226 +++++++++++++++++++++ 1 file changed, 226 insertions(+) diff --git a/src/pydp/ml/mechanisms/sklearn_pipeline.py b/src/pydp/ml/mechanisms/sklearn_pipeline.py index 2ecc58ab..c08efd58 100644 --- a/src/pydp/ml/mechanisms/sklearn_pipeline.py +++ b/src/pydp/ml/mechanisms/sklearn_pipeline.py @@ -200,3 +200,229 @@ def fit(self, X, y=None): def transform(self, X, y=None): X = self.sensitivity_calculation(X) return X + + + + +# for gaussian +class GaussianMechanism(BaseEstimator, TransformerMixin): + """ + An SKLearn Pipeline operator for applying differentially private noise + addition using the gaussian mechanism. + + """ + """ + static std::unique_ptr build(double epsilon, double delta, + double l2_sensitivity) { + dp::GaussianMechanism::Builder builder; + builder.SetEpsilon(epsilon); + builder.SetDelta(delta); + builder.SetL2Sensitivity(l2_sensitivity); + return downcast_unique_ptr( + builder.Build().value()); + }; + + Gaussian depends on these parameters + """ + def __init__(self, epsilon=1.0, delta=1.0, sensitivity=1, accountant=None): + """ + Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready + to be used. + + Parameters + ---------- + epsilon : float or int + The value of epsilon for achieving :math:`\epsilon`-differential privacy with the mechanism. Must have + `epsilon > 0`. + + sensitivity : float or int + The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + + Attributes + ------- + epsilon + Privacy budget to calculate noise. + + sensitivty + Sensitivity of the mechanism to calculate noise. + + accountant + Accountant to keep track of privacy budget. + + Raises + ------ + TypeError + If epsilon is not a number, or sensitivity is not a number or a callable. + + ValueError + If epsilon less than 0, or sensitivty is a number but less than 0. + """ + + if not isinstance(epsilon, numbers.Number): + raise TypeError(f"Epsilon must be a number. Got type {type(epsilon)}.") + + if epsilon <= 0: + raise ValueError("Epsilon must be at least larger than 0.") + + self.epsilon = epsilon + + if not isinstance(sensitivity, numbers.Number): + if not callable(sensitivity): + raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") + + + if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: + raise ValueError("Sensitivity must be at least larger than 0.") + + self.sensitivity = sensitivity + self.accountant = BudgetAccountant.load_default(accountant) + + self.gaussian = None + + if not callable(sensitivity): + self.gaussian = Gaussian() + + def sensitivity_calculation(self, X): + """ + Perform local differential privacy by adding noise using Laplace mechanismto the dataset if the sensitivity + provided if a callable. + + + Parameters + ---------- + X : numpy.array + Datset in the form of a 2-dimensional numpy array. + + Returns + ------ + X : numpy.array + Original parameter X with differentially private noise added. + """ + + n_feature = X.shape[-1] + n_data = X.shape[0] + + + for data_idx in range(n_data): + self.accountant.check(self.epsilon, 0) + for feature_idx in range(n_feature): + + # Array with data point data_idx removed for feature_idx + feature = np.concatenate((X[:data_idx,feature_idx],X[data_idx + 1:,feature_idx])) + + # Calculate sensitivity + sensitivity_ = self.sensitivity(feature) + + # Initialized Gaussian mechanism instance + gaussian = Gaussian().set_epsilon(self.epsilon).set_sensitivity(sensitivity_) + + # Add noise to the data point that was removed + noised_value = gaussian.randomise(X[data_idx,feature_idx]) + + # Replaced data point in the dataset with noised version + X[data_idx,feature_idx] = noised_value + + self.accountant.spend(self.epsilon, 0) + return X + + def fit(self, X, y=None): + return self + + def transform(self, X, y=None): + if self.gaussian is not None: + self.gaussian.set_epsilon(self.epsilon).set_sensitivity(self.sensitivity) + vector_randomise = np.vectorize(self.gaussian.randomise) + noised_array = vector_randomise(X) + return noised_array + else: + X = self.sensitivity_calculation( X) + return X + + + + + +# geometric has lambda +def GeometricMechanism(BaseEstimator, TransformerMixin): + """ + An SKLearn Pipeline operator for applying differentially private noise + addition using the geometric mechanism. + + """ + + def __init__(self, lambda_=1.0, sensitivity=1, accountant=None): + + """ + Checks that all parameters of the mechanism have been initialised correctly, and that the mechanism is ready + to be used. + + Parameters + ---------- + lambda_ : float or int + The value of lambda for achieving :math:`\lambda`-differential privacy with the mechanism. Must have + `lambda > 0`. + + sensitivity : float or int + The sensitivity of the mechanism. Must satisfy `sensitivity` > 0. + + accountant : BudgetAccountant, optional + Accountant to keep track of privacy budget. + + + Attributes + ------- + lambda_ + Privacy budget to calculate noise. + + sensitivty + Sensitivity of the mechanism to calculate noise. + + accountant + Accountant to keep track of privacy budget. + + Raises + ------ + TypeError + If lambda_ is not a number, or sensitivity is not a number or a callable. + + ValueError + If lambda_ less than 0, or sensitivty is a number but less than 0. + """ + + if not isinstance(lambda_, numbers.Number): + raise TypeError(f"Lambda must be a number. Got type {type(lambda_)}.") + + if lambda_ <= 0: + raise ValueError("Lambda must be at least larger than 0.") + + self.lambda_ = lambda_ + + if not isinstance(sensitivity, numbers.Number): + if not callable(sensitivity): + raise TypeError(f"Sensitivity must be a number or callable. Got type {type(sensitivity)}.") + + + if isinstance(sensitivity, numbers.Number) and sensitivity <= 0: + raise ValueError("Sensitivity must be at least larger than 0.") + + self.sensitivity = sensitivity + self.accountant = BudgetAccountant. + self.BaseEstimator = BaseEstimator + self.TransformerMixin = TransformerMixin + + def fit(self, X, y=None): + return self + + def transform(self, X, y=None): + if self.geometric is not None: + self.geometric.set_delta(self.delta).set_sensitivity(self.sensitivity) + vector_randomise = np.vectorize(self.geometric.randomise) + noised_array = vector_randomise(X) + return noised_array + else: + X = self.sensitivity_calculation( X) + return X \ No newline at end of file