Trusted-AI
diff --git a/‎Dockerfile
Lines changed: 1 addition & 1 deletion b/‎Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎art/defences/detector/poison/activation_defence.py
Lines changed: 4 additions & 4 deletions b/‎art/defences/detector/poison/activation_defence.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎art/defences/detector/poison/clustering_analyzer.py
Lines changed: 6 additions & 1 deletion b/‎art/defences/detector/poison/clustering_analyzer.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎art/defences/detector/poison/clustering_centroid_analysis.py
Lines changed: 49 additions & 37 deletions b/‎art/defences/detector/poison/clustering_centroid_analysis.py
Lines changed: 49 additions & 37 deletions
@@ -9,7 +9,7 @@ RUN pip3 install tensorflow==2.9.1 keras==2.9.0 numpy==1.22.4 scipy==1.8.1 matpl
                  resampy==0.3.1 ffmpeg-python==0.2.0 cma==3.2.2 pandas==1.4.3 h5py==3.7.0 tensorflow-addons==0.17.1 \
                  torch==1.12.0 torchaudio==0.12.0 torchvision==0.13.0 catboost==1.0.6 GPy==1.10.0 \
                  lightgbm==3.3.2 xgboost==1.6.1 kornia==0.6.6 lief==0.12.1 pytest==7.1.2 pytest-pep8==1.0.6 \
-                 pytest-mock==3.8.2 requests==2.28.1 umap-learn==0.5.7
+                 pytest-mock==3.8.2 requests==2.28.1 umap-learn==0.5.7 psutil==7.0.0
 
 RUN apt-get -y install ffmpeg libavcodec-extra vim git
 
 
@@ -325,12 +325,12 @@ def analyze_clusters(self, **kwargs) -> tuple[dict[str, Any], np.ndarray]:
             self.assigned_clean_by_class, self.poisonous_clusters, report = analyzer(self.clusters_by_class)
         elif analysis_type == ClusterAnalysisType.DISTANCE:
             self.assigned_clean_by_class, self.poisonous_clusters, report = analyzer(
-                self.clusters_by_class,
-                separated_activations=self.red_activations_by_class)
+                self.clusters_by_class, separated_activations=self.red_activations_by_class
+            )
         elif analysis_type == ClusterAnalysisType.SILHOUETTE_SCORES:
             self.assigned_clean_by_class, self.poisonous_clusters, report = analyzer(
-                self.clusters_by_class,
-                reduced_activations_by_class=self.red_activations_by_class)
+                self.clusters_by_class, reduced_activations_by_class=self.red_activations_by_class
+            )
         else:
             raise ValueError("Unsupported cluster analysis technique " + analysis_type.value)
 
 
@@ -32,6 +32,7 @@
 Class for all methodologies implemented to analyze clusters and determine whether they are poisonous.
 """
 
+
 @unique
 class ClusterAnalysisType(Enum):
     SMALLER = "smaller"
@@ -52,7 +53,7 @@ def get_cluster_analyzer(cluster_analysis_type: ClusterAnalysisType):
         ClusterAnalysisType.SMALLER: analyze_by_size,
         ClusterAnalysisType.RELATIVE_SIZE: analyze_by_relative_size,
         ClusterAnalysisType.DISTANCE: analyze_by_distance,
-        ClusterAnalysisType.SILHOUETTE_SCORES: analyze_by_silhouette_score
+        ClusterAnalysisType.SILHOUETTE_SCORES: analyze_by_silhouette_score,
     }
 
     if cluster_analysis_type not in analyzers:
@@ -75,6 +76,7 @@ def assign_class(clusters: np.ndarray, clean_clusters: np.ndarray, poison_cluste
     assigned_clean[np.isin(clusters, poison_clusters)] = 0
     return assigned_clean
 
+
 def analyze_by_size(separated_clusters: list[np.ndarray]) -> tuple[np.ndarray, np.ndarray, dict[str, int]]:
     """
     Designates as poisonous the cluster with fewer items on it.
@@ -127,6 +129,7 @@ def analyze_by_size(separated_clusters: list[np.ndarray]) -> tuple[np.ndarray, n
     report["suspicious_clusters"] = report["suspicious_clusters"] + np.sum(summary_poison_clusters)
     return np.asarray(all_assigned_clean, dtype=object), summary_poison_clusters, report
 
+
 def analyze_by_distance(
     separated_clusters: list[np.ndarray],
     separated_activations: list[np.ndarray],
@@ -215,6 +218,7 @@ def analyze_by_distance(
     all_assigned_clean_array = np.asarray(all_assigned_clean, dtype=object)
     return all_assigned_clean_array, summary_poison_clusters, report
 
+
 def analyze_by_relative_size(
     separated_clusters: list[np.ndarray],
     size_threshold: float = 0.35,
@@ -278,6 +282,7 @@ def analyze_by_relative_size(
     report["suspicious_clusters"] = report["suspicious_clusters"] + np.sum(summary_poison_clusters).item()
     return np.asarray(all_assigned_clean, dtype=object), summary_poison_clusters, report
 
+
 def analyze_by_silhouette_score(
     separated_clusters: list,
     reduced_activations_by_class: list,
 
@@ -37,6 +37,7 @@
 logger = logging.getLogger(__name__)
 tf.get_logger().setLevel(logging.WARN)
 
+
 def _encode_labels(y: np.array) -> (np.array, set, np.array, dict):
     """
     Given the target column, it generates the label encoding and the reverse mapping to use in the classification process
@@ -51,10 +52,12 @@ def _encode_labels(y: np.array) -> (np.array, set, np.array, dict):
     unique_classes = set(reverse_mapping.values())
     return y_encoded, unique_classes, label_mapping, reverse_mapping
 
+
 @tf.function(reduce_retracing=True)
 def _calculate_centroid_tf(features):
     return tf.reduce_mean(features, axis=0)
 
+
 def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.ndarray:
     """
     Returns the centroid of all data within a specific cluster that is classified as a specific class label
@@ -68,6 +71,7 @@ def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.
     centroid = _calculate_centroid_tf(features_tf)
     return centroid.numpy()
 
+
 def _class_clustering(y: np.array, features: np.array, label: any, clusterer: ClusterMixin) -> (np.array, np.array):
     """
     Given a class label, it clusters all the feature representations that map to that class
@@ -83,10 +87,12 @@ def _class_clustering(y: np.array, features: np.array, label: any, clusterer: Cl
     cluster_labels = clusterer.fit_predict(selected_features)
     return cluster_labels, selected_indices
 
+
 @tf.function
 def _calculate_features(feature_representation_model, x):
     return feature_representation_model(x, training=False)
 
+
 def _feature_extraction(x_train: np.array, feature_representation_model: Model) -> np.ndarray:
     """
     Extract features from the model using the feature representation sub model.
@@ -120,7 +126,9 @@ def _feature_extraction(x_train: np.array, feature_representation_model: Model)
     return features.numpy()
 
 
-def _cluster_classes(y_train: np.array, unique_classes: set[int], features: np.array, clusterer: ClusterMixin) -> (np.array, dict):
+def _cluster_classes(
+    y_train: np.array, unique_classes: set[int], features: np.array, clusterer: ClusterMixin
+) -> (np.array, dict):
     """
     Clusters all the classes in the given dataset into uniquely identifiable clusters.
 
@@ -131,7 +139,7 @@ def _cluster_classes(y_train: np.array, unique_classes: set[int], features: np.a
     # clustering runs by classes
     logging.info("Clustering classes...")
     used_cluster_labels = 0
-    cluster_class_mapping  = dict()
+    cluster_class_mapping = dict()
     class_cluster_labels = np.full(len(y_train), -1)
 
     logging.info(f"Unique classes are: {unique_classes}")
@@ -145,8 +153,8 @@ def _cluster_classes(y_train: np.array, unique_classes: set[int], features: np.a
         class_cluster_labels[selected_indices] = cluster_labels
 
         # the class (label) corresponding to the cluster is saved for centroid deviation calculation
-        for l in np.unique(cluster_labels[cluster_labels != -1]):
-            cluster_class_mapping[l] = class_label
+        for label in np.unique(cluster_labels[cluster_labels != -1]):
+            cluster_class_mapping[label] = class_label
 
     return class_cluster_labels, cluster_class_mapping
 
@@ -170,7 +178,7 @@ class ClusteringCentroidAnalysis(PoisonFilteringDefence):
         "final_feature_layer_name",
         "misclassification_threshold",
         "reducer",
-        "clsuterer"
+        "clsuterer",
     ]
     valid_clustering = ["DBSCAN"]
     valid_reduce = ["UMAP"]
@@ -184,7 +192,6 @@ def _get_benign_data(self) -> (np.ndarray, np.ndarray):
         if len(self.benign_indices) == 0:
             raise ValueError(f"Benign indices passed ({len(self.benign_indices)}) are not enough to run the algorithm")
 
-
         return self.x_train[self.benign_indices], self.y_train[self.benign_indices]
 
     def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
@@ -204,18 +211,21 @@ def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
         except ValueError:
             raise ValueError(f"Layer with name '{final_feature_layer_name}' not found in the model.")
 
-        if not hasattr(final_feature_layer, 'activation') or final_feature_layer.activation != tf.keras.activations.relu:
+        if (
+            not hasattr(final_feature_layer, "activation")
+            or final_feature_layer.activation != tf.keras.activations.relu
+        ):
             warnings.warn(f"Final feature layer '{final_feature_layer_name}' must have a ReLU activation.", UserWarning)
 
         # Create a feature representation submodel with weight sharing
         feature_representation_model = Model(
             inputs=keras_model.inputs,
             outputs=keras_model.get_layer(final_feature_layer_name).output,
-            name="feature_representation_model"
+            name="feature_representation_model",
         )
 
         final_feature_layer_index = keras_model.layers.index(final_feature_layer)
-        classifier_submodel_layers = keras_model.layers[final_feature_layer_index + 1:]
+        classifier_submodel_layers = keras_model.layers[final_feature_layer_index + 1 :]
 
         # Create the classifier submodel
         classifying_submodel = Sequential(classifier_submodel_layers, name="classifying_submodel")
@@ -245,15 +255,15 @@ def get_clusters(self) -> np.array:
         return result
 
     def __init__(
-            self,
-            classifier: "CLASSIFIER_TYPE",
-            x_train: np.ndarray,
-            y_train: np.ndarray,
-            benign_indices: np.array,
-            final_feature_layer_name: str,
-            misclassification_threshold: float,
-            reducer = UMAP(n_neighbors=5, min_dist=0),
-            clusterer = DBSCAN(eps=0.8, min_samples=20)
+        self,
+        classifier: "CLASSIFIER_TYPE",
+        x_train: np.ndarray,
+        y_train: np.ndarray,
+        benign_indices: np.array,
+        final_feature_layer_name: str,
+        misclassification_threshold: float,
+        reducer=UMAP(n_neighbors=5, min_dist=0),
+        clusterer=DBSCAN(eps=0.8, min_samples=20),
     ):
         """
         Creates a :class: `ClusteringCentroidAnalysis` object for the given classifier
@@ -298,13 +308,11 @@ def evaluate_defence(self, is_clean: np.ndarray, **kwargs) -> str:
 
         # Create evaluator and analyze results
         errors_by_class, confusion_matrix_json = evaluator.analyze_correctness(
-            assigned_clean_by_class=assigned_clean_by_class,
-            is_clean_by_class=is_clean_by_class
+            assigned_clean_by_class=assigned_clean_by_class, is_clean_by_class=is_clean_by_class
         )
 
         return confusion_matrix_json
 
-
     def _calculate_misclassification_rate(self, class_label: int, deviation: np.array) -> np.float64:
         """
         Calculate the misclassification rate when applying a deviation to other classes.
@@ -324,10 +332,12 @@ def _calculate_misclassification_rate(self, class_label: int, deviation: np.arra
         sample_features = self.feature_representation_model.predict(sample_data)
         feature_shape = sample_features.shape[1:]
 
-        @tf.function(input_signature=[
-            tf.TensorSpec(shape=[None, *feature_shape], dtype=tf.float32),
-            tf.TensorSpec(shape=deviation.shape, dtype=tf.float32)
-        ])
+        @tf.function(
+            input_signature=[
+                tf.TensorSpec(shape=[None, *feature_shape], dtype=tf.float32),
+                tf.TensorSpec(shape=deviation.shape, dtype=tf.float32),
+            ]
+        )
         def predict_with_deviation(features, deviation):
             # Add deviation to features and pass through ReLu to keep in latent space
             deviated_features = tf.nn.relu(features + deviation)
@@ -390,11 +400,12 @@ def predict_with_deviation(features, deviation):
             return np.float64(0.0)
 
         all_f_vectors_np = np.concatenate(all_features, axis=0)
-        logger.info(f"MR --> {class_label} , |f| = {np.linalg.norm(np.mean(all_f_vectors_np, axis=0))}: {misclassified_elements} / {total_elements} = {np.float64(misclassified_elements) / np.float64(total_elements)}")
+        logger.info(
+            f"MR --> {class_label} , |f| = {np.linalg.norm(np.mean(all_f_vectors_np, axis=0))}: {misclassified_elements} / {total_elements} = {np.float64(misclassified_elements) / np.float64(total_elements)}"
+        )
 
         return np.float64(misclassified_elements) / np.float64(total_elements)
 
-
     def detect_poison(self, **kwargs) -> (dict, list[int]):
 
         # saves important information about the algorithm execution for further analysis
@@ -407,14 +418,13 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
         # FIXME: temporal fix to test other layers
         if len(self.features.shape) > 2:
             num_samples = self.features.shape[0]
-            self.features = self.features.reshape(num_samples, -1) # Flattening
+            self.features = self.features.reshape(num_samples, -1)  # Flattening
 
         self.features_reduced = self.reducer.fit_transform(self.features)
 
-        self.class_cluster_labels, self.cluster_class_mapping = _cluster_classes(self.y_train,
-                                                                       self.unique_classes,
-                                                                       self.features_reduced,
-                                                                       self.clusterer)
+        self.class_cluster_labels, self.cluster_class_mapping = _cluster_classes(
+            self.y_train, self.unique_classes, self.features_reduced, self.clusterer
+        )
 
         # outliers are poisoned
         outlier_indices = np.where(self.class_cluster_labels == -1)[0]
@@ -443,8 +453,7 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
         # for each target class
         for class_label in self.unique_classes:
             benign_class_indices = np.intersect1d(self.benign_indices, np.where(self.y_train == class_label)[0])
-            benign_centroids[class_label] = _calculate_centroid(benign_class_indices,
-                                                                self.features)
+            benign_centroids[class_label] = _calculate_centroid(benign_class_indices, self.features)
 
         logging.info("Calculating misclassification rates...")
         misclassification_rates = dict()
@@ -457,19 +466,22 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
             # MR^k_i
             # with unique cluster labels for each cluster in each clustering run, the label already maps to a target class
             misclassification_rates[cluster_label] = self._calculate_misclassification_rate(class_label, deviation)
-            logging.info(f"MR (k={cluster_label}, i={class_label}, |d|={np.linalg.norm(deviation)}) = {misclassification_rates[cluster_label]}")
+            logging.info(
+                f"MR (k={cluster_label}, i={class_label}, |d|={np.linalg.norm(deviation)}) = {misclassification_rates[cluster_label]}"
+            )
 
             report["cluster_data"][cluster_label]["centroid_l2"] = np.linalg.norm(real_centroids[cluster_label])
             report["cluster_data"][cluster_label]["deviation_l2"] = np.linalg.norm(deviation)
             report["cluster_data"][cluster_label]["class"] = class_label
             report["cluster_data"][cluster_label]["misclassification_rate"] = misclassification_rates[cluster_label]
 
-
         logging.info("Evaluating cluster misclassification...")
         for cluster_label, mr in misclassification_rates.items():
             if mr >= 1 - self.misclassification_threshold:
                 cluster_indices = np.where(self.class_cluster_labels == cluster_label)[0]
                 self.is_clean[cluster_indices] = 0
-                logging.info(f"Cluster k={cluster_label} i={self.cluster_class_mapping[cluster_label]} considered poison ({misclassification_rates[cluster_label]} >= {1 - self.misclassification_threshold})")
+                logging.info(
+                    f"Cluster k={cluster_label} i={self.cluster_class_mapping[cluster_label]} considered poison ({misclassification_rates[cluster_label]} >= {1 - self.misclassification_threshold})"
+                )
 
         return report, self.is_clean.copy()