37
37
logger = logging .getLogger (__name__ )
38
38
tf .get_logger ().setLevel (logging .WARN )
39
39
40
+
40
41
def _encode_labels (y : np .array ) -> (np .array , set , np .array , dict ):
41
42
"""
42
43
Given the target column, it generates the label encoding and the reverse mapping to use in the classification process
@@ -51,10 +52,12 @@ def _encode_labels(y: np.array) -> (np.array, set, np.array, dict):
51
52
unique_classes = set (reverse_mapping .values ())
52
53
return y_encoded , unique_classes , label_mapping , reverse_mapping
53
54
55
+
54
56
@tf .function (reduce_retracing = True )
55
57
def _calculate_centroid_tf (features ):
56
58
return tf .reduce_mean (features , axis = 0 )
57
59
60
+
58
61
def _calculate_centroid (selected_indices : np .ndarray , features : np .array ) -> np .ndarray :
59
62
"""
60
63
Returns the centroid of all data within a specific cluster that is classified as a specific class label
@@ -68,6 +71,7 @@ def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.
68
71
centroid = _calculate_centroid_tf (features_tf )
69
72
return centroid .numpy ()
70
73
74
+
71
75
def _class_clustering (y : np .array , features : np .array , label : any , clusterer : ClusterMixin ) -> (np .array , np .array ):
72
76
"""
73
77
Given a class label, it clusters all the feature representations that map to that class
@@ -83,10 +87,12 @@ def _class_clustering(y: np.array, features: np.array, label: any, clusterer: Cl
83
87
cluster_labels = clusterer .fit_predict (selected_features )
84
88
return cluster_labels , selected_indices
85
89
90
+
86
91
@tf .function
87
92
def _calculate_features (feature_representation_model , x ):
88
93
return feature_representation_model (x , training = False )
89
94
95
+
90
96
def _feature_extraction (x_train : np .array , feature_representation_model : Model ) -> np .ndarray :
91
97
"""
92
98
Extract features from the model using the feature representation sub model.
@@ -120,7 +126,9 @@ def _feature_extraction(x_train: np.array, feature_representation_model: Model)
120
126
return features .numpy ()
121
127
122
128
123
- def _cluster_classes (y_train : np .array , unique_classes : set [int ], features : np .array , clusterer : ClusterMixin ) -> (np .array , dict ):
129
+ def _cluster_classes (
130
+ y_train : np .array , unique_classes : set [int ], features : np .array , clusterer : ClusterMixin
131
+ ) -> (np .array , dict ):
124
132
"""
125
133
Clusters all the classes in the given dataset into uniquely identifiable clusters.
126
134
@@ -131,7 +139,7 @@ def _cluster_classes(y_train: np.array, unique_classes: set[int], features: np.a
131
139
# clustering runs by classes
132
140
logging .info ("Clustering classes..." )
133
141
used_cluster_labels = 0
134
- cluster_class_mapping = dict ()
142
+ cluster_class_mapping = dict ()
135
143
class_cluster_labels = np .full (len (y_train ), - 1 )
136
144
137
145
logging .info (f"Unique classes are: { unique_classes } " )
@@ -145,8 +153,8 @@ def _cluster_classes(y_train: np.array, unique_classes: set[int], features: np.a
145
153
class_cluster_labels [selected_indices ] = cluster_labels
146
154
147
155
# the class (label) corresponding to the cluster is saved for centroid deviation calculation
148
- for l in np .unique (cluster_labels [cluster_labels != - 1 ]):
149
- cluster_class_mapping [l ] = class_label
156
+ for label in np .unique (cluster_labels [cluster_labels != - 1 ]):
157
+ cluster_class_mapping [label ] = class_label
150
158
151
159
return class_cluster_labels , cluster_class_mapping
152
160
@@ -170,7 +178,7 @@ class ClusteringCentroidAnalysis(PoisonFilteringDefence):
170
178
"final_feature_layer_name" ,
171
179
"misclassification_threshold" ,
172
180
"reducer" ,
173
- "clsuterer"
181
+ "clsuterer" ,
174
182
]
175
183
valid_clustering = ["DBSCAN" ]
176
184
valid_reduce = ["UMAP" ]
@@ -184,7 +192,6 @@ def _get_benign_data(self) -> (np.ndarray, np.ndarray):
184
192
if len (self .benign_indices ) == 0 :
185
193
raise ValueError (f"Benign indices passed ({ len (self .benign_indices )} ) are not enough to run the algorithm" )
186
194
187
-
188
195
return self .x_train [self .benign_indices ], self .y_train [self .benign_indices ]
189
196
190
197
def _extract_submodels (self , final_feature_layer_name : str ) -> (Model , Model ):
@@ -204,18 +211,21 @@ def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
204
211
except ValueError :
205
212
raise ValueError (f"Layer with name '{ final_feature_layer_name } ' not found in the model." )
206
213
207
- if not hasattr (final_feature_layer , 'activation' ) or final_feature_layer .activation != tf .keras .activations .relu :
214
+ if (
215
+ not hasattr (final_feature_layer , "activation" )
216
+ or final_feature_layer .activation != tf .keras .activations .relu
217
+ ):
208
218
warnings .warn (f"Final feature layer '{ final_feature_layer_name } ' must have a ReLU activation." , UserWarning )
209
219
210
220
# Create a feature representation submodel with weight sharing
211
221
feature_representation_model = Model (
212
222
inputs = keras_model .inputs ,
213
223
outputs = keras_model .get_layer (final_feature_layer_name ).output ,
214
- name = "feature_representation_model"
224
+ name = "feature_representation_model" ,
215
225
)
216
226
217
227
final_feature_layer_index = keras_model .layers .index (final_feature_layer )
218
- classifier_submodel_layers = keras_model .layers [final_feature_layer_index + 1 :]
228
+ classifier_submodel_layers = keras_model .layers [final_feature_layer_index + 1 :]
219
229
220
230
# Create the classifier submodel
221
231
classifying_submodel = Sequential (classifier_submodel_layers , name = "classifying_submodel" )
@@ -245,15 +255,15 @@ def get_clusters(self) -> np.array:
245
255
return result
246
256
247
257
def __init__ (
248
- self ,
249
- classifier : "CLASSIFIER_TYPE" ,
250
- x_train : np .ndarray ,
251
- y_train : np .ndarray ,
252
- benign_indices : np .array ,
253
- final_feature_layer_name : str ,
254
- misclassification_threshold : float ,
255
- reducer = UMAP (n_neighbors = 5 , min_dist = 0 ),
256
- clusterer = DBSCAN (eps = 0.8 , min_samples = 20 )
258
+ self ,
259
+ classifier : "CLASSIFIER_TYPE" ,
260
+ x_train : np .ndarray ,
261
+ y_train : np .ndarray ,
262
+ benign_indices : np .array ,
263
+ final_feature_layer_name : str ,
264
+ misclassification_threshold : float ,
265
+ reducer = UMAP (n_neighbors = 5 , min_dist = 0 ),
266
+ clusterer = DBSCAN (eps = 0.8 , min_samples = 20 ),
257
267
):
258
268
"""
259
269
Creates a :class: `ClusteringCentroidAnalysis` object for the given classifier
@@ -298,13 +308,11 @@ def evaluate_defence(self, is_clean: np.ndarray, **kwargs) -> str:
298
308
299
309
# Create evaluator and analyze results
300
310
errors_by_class , confusion_matrix_json = evaluator .analyze_correctness (
301
- assigned_clean_by_class = assigned_clean_by_class ,
302
- is_clean_by_class = is_clean_by_class
311
+ assigned_clean_by_class = assigned_clean_by_class , is_clean_by_class = is_clean_by_class
303
312
)
304
313
305
314
return confusion_matrix_json
306
315
307
-
308
316
def _calculate_misclassification_rate (self , class_label : int , deviation : np .array ) -> np .float64 :
309
317
"""
310
318
Calculate the misclassification rate when applying a deviation to other classes.
@@ -324,10 +332,12 @@ def _calculate_misclassification_rate(self, class_label: int, deviation: np.arra
324
332
sample_features = self .feature_representation_model .predict (sample_data )
325
333
feature_shape = sample_features .shape [1 :]
326
334
327
- @tf .function (input_signature = [
328
- tf .TensorSpec (shape = [None , * feature_shape ], dtype = tf .float32 ),
329
- tf .TensorSpec (shape = deviation .shape , dtype = tf .float32 )
330
- ])
335
+ @tf .function (
336
+ input_signature = [
337
+ tf .TensorSpec (shape = [None , * feature_shape ], dtype = tf .float32 ),
338
+ tf .TensorSpec (shape = deviation .shape , dtype = tf .float32 ),
339
+ ]
340
+ )
331
341
def predict_with_deviation (features , deviation ):
332
342
# Add deviation to features and pass through ReLu to keep in latent space
333
343
deviated_features = tf .nn .relu (features + deviation )
@@ -390,11 +400,12 @@ def predict_with_deviation(features, deviation):
390
400
return np .float64 (0.0 )
391
401
392
402
all_f_vectors_np = np .concatenate (all_features , axis = 0 )
393
- logger .info (f"MR --> { class_label } , |f| = { np .linalg .norm (np .mean (all_f_vectors_np , axis = 0 ))} : { misclassified_elements } / { total_elements } = { np .float64 (misclassified_elements ) / np .float64 (total_elements )} " )
403
+ logger .info (
404
+ f"MR --> { class_label } , |f| = { np .linalg .norm (np .mean (all_f_vectors_np , axis = 0 ))} : { misclassified_elements } / { total_elements } = { np .float64 (misclassified_elements ) / np .float64 (total_elements )} "
405
+ )
394
406
395
407
return np .float64 (misclassified_elements ) / np .float64 (total_elements )
396
408
397
-
398
409
def detect_poison (self , ** kwargs ) -> (dict , list [int ]):
399
410
400
411
# saves important information about the algorithm execution for further analysis
@@ -407,14 +418,13 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
407
418
# FIXME: temporal fix to test other layers
408
419
if len (self .features .shape ) > 2 :
409
420
num_samples = self .features .shape [0 ]
410
- self .features = self .features .reshape (num_samples , - 1 ) # Flattening
421
+ self .features = self .features .reshape (num_samples , - 1 ) # Flattening
411
422
412
423
self .features_reduced = self .reducer .fit_transform (self .features )
413
424
414
- self .class_cluster_labels , self .cluster_class_mapping = _cluster_classes (self .y_train ,
415
- self .unique_classes ,
416
- self .features_reduced ,
417
- self .clusterer )
425
+ self .class_cluster_labels , self .cluster_class_mapping = _cluster_classes (
426
+ self .y_train , self .unique_classes , self .features_reduced , self .clusterer
427
+ )
418
428
419
429
# outliers are poisoned
420
430
outlier_indices = np .where (self .class_cluster_labels == - 1 )[0 ]
@@ -443,8 +453,7 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
443
453
# for each target class
444
454
for class_label in self .unique_classes :
445
455
benign_class_indices = np .intersect1d (self .benign_indices , np .where (self .y_train == class_label )[0 ])
446
- benign_centroids [class_label ] = _calculate_centroid (benign_class_indices ,
447
- self .features )
456
+ benign_centroids [class_label ] = _calculate_centroid (benign_class_indices , self .features )
448
457
449
458
logging .info ("Calculating misclassification rates..." )
450
459
misclassification_rates = dict ()
@@ -457,19 +466,22 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
457
466
# MR^k_i
458
467
# with unique cluster labels for each cluster in each clustering run, the label already maps to a target class
459
468
misclassification_rates [cluster_label ] = self ._calculate_misclassification_rate (class_label , deviation )
460
- logging .info (f"MR (k={ cluster_label } , i={ class_label } , |d|={ np .linalg .norm (deviation )} ) = { misclassification_rates [cluster_label ]} " )
469
+ logging .info (
470
+ f"MR (k={ cluster_label } , i={ class_label } , |d|={ np .linalg .norm (deviation )} ) = { misclassification_rates [cluster_label ]} "
471
+ )
461
472
462
473
report ["cluster_data" ][cluster_label ]["centroid_l2" ] = np .linalg .norm (real_centroids [cluster_label ])
463
474
report ["cluster_data" ][cluster_label ]["deviation_l2" ] = np .linalg .norm (deviation )
464
475
report ["cluster_data" ][cluster_label ]["class" ] = class_label
465
476
report ["cluster_data" ][cluster_label ]["misclassification_rate" ] = misclassification_rates [cluster_label ]
466
477
467
-
468
478
logging .info ("Evaluating cluster misclassification..." )
469
479
for cluster_label , mr in misclassification_rates .items ():
470
480
if mr >= 1 - self .misclassification_threshold :
471
481
cluster_indices = np .where (self .class_cluster_labels == cluster_label )[0 ]
472
482
self .is_clean [cluster_indices ] = 0
473
- logging .info (f"Cluster k={ cluster_label } i={ self .cluster_class_mapping [cluster_label ]} considered poison ({ misclassification_rates [cluster_label ]} >= { 1 - self .misclassification_threshold } )" )
483
+ logging .info (
484
+ f"Cluster k={ cluster_label } i={ self .cluster_class_mapping [cluster_label ]} considered poison ({ misclassification_rates [cluster_label ]} >= { 1 - self .misclassification_threshold } )"
485
+ )
474
486
475
487
return report , self .is_clean .copy ()
0 commit comments