MyPy fixes

c4ts0up · c4ts0up · commit 2ec058cdd792 · 2025-06-13T22:24:48.000-05:00
Signed-off-by: Álvaro Bacca Peña &lt;a.baccap@uniandes.edu.co&gt;
diff --git a/art/defences/detector/poison/clustering_centroid_analysis.py b/art/defences/detector/poison/clustering_centroid_analysis.py
@@ -19,10 +19,11 @@
 
 import logging
 import warnings
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Tuple, Union, Dict, Any, List
 
 import numpy as np
 import tensorflow as tf
+from numpy._typing import _64Bit
 from sklearn.base import ClusterMixin
 from sklearn.cluster import DBSCAN
 from tensorflow.keras import Model, Sequential
@@ -38,11 +39,11 @@
 tf.get_logger().setLevel(logging.WARN)
 
 
-def _encode_labels(y: np.array) -> (np.array, set, np.array, dict):
+def _encode_labels(y: np.ndarray) -> Tuple[np.ndarray, set, np.ndarray, dict]:
     """
     Given the target column, it generates the label encoding and the reverse mapping to use in the classification process
 
-    :param y: 1D np.array with single values that represent the different classes
+    :param y: 1D np.ndarray with single values that represent the different classes
     :return: (y_encoded, unique_classes, label_mapping, reverse_mapping) encoded column, set of unique classes,
         mapping from class to numeric label, and mapping from numeric label to class
     """
@@ -58,7 +59,7 @@ def _calculate_centroid_tf(features):
     return tf.reduce_mean(features, axis=0)
 
 
-def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.ndarray:
+def _calculate_centroid(selected_indices: np.ndarray, features: np.ndarray) -> np.ndarray:
     """
     Returns the centroid of all data within a specific cluster that is classified as a specific class label
 
@@ -72,7 +73,9 @@ def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.
     return centroid.numpy()
 
 
-def _class_clustering(y: np.array, features: np.array, label: any, clusterer: ClusterMixin) -> (np.array, np.array):
+def _class_clustering(
+    y: np.ndarray, features: np.ndarray, label: Union[int, str], clusterer: ClusterMixin
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Given a class label, it clusters all the feature representations that map to that class
 
@@ -93,7 +96,7 @@ def _calculate_features(feature_representation_model, x):
     return feature_representation_model(x, training=False)
 
 
-def _feature_extraction(x_train: np.array, feature_representation_model: Model) -> np.ndarray:
+def _feature_extraction(x_train: np.ndarray, feature_representation_model: Model) -> np.ndarray:
     """
     Extract features from the model using the feature representation sub model.
 
@@ -108,7 +111,7 @@ def _feature_extraction(x_train: np.array, feature_representation_model: Model)
     # Process in batches to avoid memory issues
     batch_size = 256
     num_batches = int(np.ceil(len(data) / batch_size))
-    features = []
+    features: List[tf.Tensor] = []
 
     for i in range(num_batches):
         start_idx = i * batch_size
@@ -118,17 +121,18 @@ def _feature_extraction(x_train: np.array, feature_representation_model: Model)
         features.append(batch_features)
 
     # Concatenate all batches
+    final_features_tensor: tf.Tensor
     if len(features) > 1:
-        features = tf.concat(features, axis=0)
+        final_features_tensor = tf.concat(features, axis=0)
     else:
-        features = features[0]
+        final_features_tensor = features[0]
 
-    return features.numpy()
+    return final_features_tensor.numpy()
 
 
 def _cluster_classes(
-    y_train: np.array, unique_classes: set[int], features: np.array, clusterer: ClusterMixin
-) -> (np.array, dict):
+    y_train: np.ndarray, unique_classes: set[int], features: np.ndarray, clusterer: ClusterMixin
+) -> Tuple[np.ndarray, dict]:
     """
     Clusters all the classes in the given dataset into uniquely identifiable clusters.
 
@@ -183,7 +187,7 @@ class ClusteringCentroidAnalysis(PoisonFilteringDefence):
     valid_clustering = ["DBSCAN"]
     valid_reduce = ["UMAP"]
 
-    def _get_benign_data(self) -> (np.ndarray, np.ndarray):
+    def _get_benign_data(self) -> Tuple[np.ndarray, np.ndarray]:
         """
         Retrieves the benign data from the training data using benign indices
 
@@ -194,7 +198,7 @@ def _get_benign_data(self) -> (np.ndarray, np.ndarray):
 
         return self.x_train[self.benign_indices], self.y_train[self.benign_indices]
 
-    def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
+    def _extract_submodels(self, final_feature_layer_name: str) -> Tuple[Model, Model]:
         """
         Extracts the feature representation and final classifier submodels from the original classifier.
         Composition of both models should result in the original model
@@ -204,7 +208,7 @@ def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
         :return: (feature_representation_submodel, classifying_submodel)
         """
         logging.info("Extracting submodels...")
-        keras_model = self.classifier.model
+        keras_model: Model = self.classifier.model
 
         try:
             final_feature_layer = keras_model.get_layer(name=final_feature_layer_name)
@@ -236,9 +240,9 @@ def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
 
         return feature_representation_model, classifying_submodel
 
-    def get_clusters(self) -> np.array:
+    def get_clusters(self) -> np.ndarray:
         """
-        :return: np.array with m+1 columns, where m is dimensionality of the dimensionality reducer's output.
+        :return: np.ndarray with m+1 columns, where m is dimensionality of the dimensionality reducer's output.
             m columns are used for feature representations and the last column is used for cluster label.
         """
         # Ensure features have been reduced and clustering has been performed
@@ -259,7 +263,7 @@ def __init__(
         classifier: "CLASSIFIER_TYPE",
         x_train: np.ndarray,
         y_train: np.ndarray,
-        benign_indices: np.array,
+        benign_indices: np.ndarray,
         final_feature_layer_name: str,
         misclassification_threshold: float,
         reducer=UMAP(n_neighbors=5, min_dist=0),
@@ -313,7 +317,7 @@ def evaluate_defence(self, is_clean: np.ndarray, **kwargs) -> str:
 
         return confusion_matrix_json
 
-    def _calculate_misclassification_rate(self, class_label: int, deviation: np.array) -> np.float64:
+    def _calculate_misclassification_rate(self, class_label: int, deviation: np.ndarray) -> np.float64:
         """
         Calculate the misclassification rate when applying a deviation to other classes.
 
@@ -406,12 +410,12 @@ def predict_with_deviation(features, deviation):
 
         return np.float64(misclassified_elements) / np.float64(total_elements)
 
-    def detect_poison(self, **kwargs) -> (dict, list[int]):
+    def detect_poison(self, **kwargs) -> Tuple[dict, List[int]]:
 
         # saves important information about the algorithm execution for further analysis
-        report = dict()
+        report: Dict[str, Any] = dict()
 
-        self.is_clean = np.ones(len(self.y_train))
+        self.is_clean_np = np.ones(len(self.y_train))
 
         self.features = _feature_extraction(self.x_train, self.feature_representation_model)
 
@@ -428,7 +432,7 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
 
         # outliers are poisoned
         outlier_indices = np.where(self.class_cluster_labels == -1)[0]
-        self.is_clean[outlier_indices] = 0
+        self.is_clean_np[outlier_indices] = 0
 
         # cluster labels are saved in the report
         report["cluster_labels"] = self.get_clusters()
@@ -479,9 +483,11 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
         for cluster_label, mr in misclassification_rates.items():
             if mr >= 1 - self.misclassification_threshold:
                 cluster_indices = np.where(self.class_cluster_labels == cluster_label)[0]
-                self.is_clean[cluster_indices] = 0
+                self.is_clean_np[cluster_indices] = 0
                 logging.info(
                     f"Cluster k={cluster_label} i={self.cluster_class_mapping[cluster_label]} considered poison ({misclassification_rates[cluster_label]} >= {1 - self.misclassification_threshold})"
                 )
 
+        # Forced conversion for interface consistency
+        self.is_clean: List[int] = self.is_clean_np.tolist()
         return report, self.is_clean.copy()
diff --git a/art/performance_monitor.py b/art/performance_monitor.py
@@ -6,7 +6,7 @@
 import threading
 import time
 from datetime import datetime
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Union
 
 import numpy as np
 import pandas as pd
@@ -63,8 +63,8 @@ def __init__(self, interval: float = 0.1) -> None:
         self.cpu_percentages: List[float] = []
         self.memory_usages: List[float] = []
         self.timestamps: List[float] = []
-        self.gpu_usages: List[float] = []
-        self.gpu_memories: List[float] = []
+        self.gpu_usages: List[Any] = []
+        self.gpu_memories: List[Any] = []
         self.stop_flag = False
         self.process = psutil.Process(os.getpid())
 
@@ -238,7 +238,7 @@ def plot_results(self, title: Optional[str] = None) -> Optional[Any]:
             axes[3].set_ylabel("GPU Memory (MB)")
             axes[3].grid(True)
 
-        plt.tight_layout(rect=[0, 0, 1, 0.95])
+        plt.tight_layout(rect=(0.0, 0.0, 1.0, 0.95))
         return fig
 
 
@@ -259,8 +259,8 @@ def __init__(self, task_name: str = "Task", plot: bool = False, save_data: bool
         self.plot = plot
         self.save_data = save_data
         self.monitor = ResourceMonitor()
-        self.start_time = 0
-        self.end_time = 0
+        self.start_time: float = 0
+        self.end_time: float = 0
 
     def __enter__(self) -> "PerformanceTimer":
         """Start monitoring when entering the context."""
diff --git a/tests/defences/detector/poison/test_clustering_centroid_analysis.py b/tests/defences/detector/poison/test_clustering_centroid_analysis.py
@@ -1024,8 +1024,10 @@ def test_detect_poison_all_benign(self):
         ):
             report, is_clean = defence.detect_poison()
 
+        is_clean_np = np.array(is_clean, dtype=np.int_)
+
         self.assertIsInstance(report, dict)
-        self.assertEqual(len(self.y_train), len(is_clean[is_clean == 1]))
+        self.assertEqual(len(self.y_train), len(is_clean_np[is_clean_np == 1]))
         # In the all-benign case, no samples should be marked as poisoned
         self.assertEqual(np.sum(is_clean), len(self.y_train))
 
@@ -1093,8 +1095,10 @@ def mock_misclass_rate(class_label, deviation):
         ):
             report, is_clean = defence.detect_poison()
 
+        is_clean_np = np.array(is_clean, dtype=np.int_)
+
         # all elements in class 0 are poisoned. No outliers --> all poisoned elements are class 0
-        np.testing.assert_equal(np.where(is_clean == 0), np.where(self.y_train == 0))
+        np.testing.assert_equal(np.where(is_clean_np == 0), np.where(self.y_train == 0))
 
     def test_detect_poison_both_mechanisms(self):
         """
@@ -1129,12 +1133,14 @@ def mock_misclass_rate(class_label, deviation):
         ):
             report, is_clean = defence.detect_poison()
 
+        is_clean_np = np.array(is_clean, dtype=np.int_)
+
         self.assertIsInstance(report, dict)
         # all elements in class 1 are poisoned
-        self.assertGreater(len(is_clean[is_clean == 0]), len(self.y_train[self.y_train == 1]))
-        self.assertTrue(np.all(is_clean[np.where(self.y_train == 1)] == 0))
+        self.assertGreater(len(is_clean_np[is_clean_np == 0]), len(self.y_train[self.y_train == 1]))
+        self.assertTrue(np.all(is_clean_np[np.where(self.y_train == 1)] == 0))
         # most elements in class 0 are detected as clean. Poisoned ones are outliers. FIXME: can I make this more robust?
-        self.assertLess(np.mean(self.y_train[np.where(is_clean == 1)]), 0.2)
+        self.assertLess(np.mean(self.y_train[np.where(is_clean_np == 1)]), 0.2)
 
 
 class TestEvaluateDefence(unittest.TestCase):