Skip to content

Commit 2ec058c

Browse files
committed
MyPy fixes
Signed-off-by: Álvaro Bacca Peña <[email protected]>
1 parent 8212437 commit 2ec058c

File tree

3 files changed

+47
-35
lines changed

3 files changed

+47
-35
lines changed

art/defences/detector/poison/clustering_centroid_analysis.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919

2020
import logging
2121
import warnings
22-
from typing import TYPE_CHECKING
22+
from typing import TYPE_CHECKING, Tuple, Union, Dict, Any, List
2323

2424
import numpy as np
2525
import tensorflow as tf
26+
from numpy._typing import _64Bit
2627
from sklearn.base import ClusterMixin
2728
from sklearn.cluster import DBSCAN
2829
from tensorflow.keras import Model, Sequential
@@ -38,11 +39,11 @@
3839
tf.get_logger().setLevel(logging.WARN)
3940

4041

41-
def _encode_labels(y: np.array) -> (np.array, set, np.array, dict):
42+
def _encode_labels(y: np.ndarray) -> Tuple[np.ndarray, set, np.ndarray, dict]:
4243
"""
4344
Given the target column, it generates the label encoding and the reverse mapping to use in the classification process
4445
45-
:param y: 1D np.array with single values that represent the different classes
46+
:param y: 1D np.ndarray with single values that represent the different classes
4647
:return: (y_encoded, unique_classes, label_mapping, reverse_mapping) encoded column, set of unique classes,
4748
mapping from class to numeric label, and mapping from numeric label to class
4849
"""
@@ -58,7 +59,7 @@ def _calculate_centroid_tf(features):
5859
return tf.reduce_mean(features, axis=0)
5960

6061

61-
def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.ndarray:
62+
def _calculate_centroid(selected_indices: np.ndarray, features: np.ndarray) -> np.ndarray:
6263
"""
6364
Returns the centroid of all data within a specific cluster that is classified as a specific class label
6465
@@ -72,7 +73,9 @@ def _calculate_centroid(selected_indices: np.ndarray, features: np.array) -> np.
7273
return centroid.numpy()
7374

7475

75-
def _class_clustering(y: np.array, features: np.array, label: any, clusterer: ClusterMixin) -> (np.array, np.array):
76+
def _class_clustering(
77+
y: np.ndarray, features: np.ndarray, label: Union[int, str], clusterer: ClusterMixin
78+
) -> Tuple[np.ndarray, np.ndarray]:
7679
"""
7780
Given a class label, it clusters all the feature representations that map to that class
7881
@@ -93,7 +96,7 @@ def _calculate_features(feature_representation_model, x):
9396
return feature_representation_model(x, training=False)
9497

9598

96-
def _feature_extraction(x_train: np.array, feature_representation_model: Model) -> np.ndarray:
99+
def _feature_extraction(x_train: np.ndarray, feature_representation_model: Model) -> np.ndarray:
97100
"""
98101
Extract features from the model using the feature representation sub model.
99102
@@ -108,7 +111,7 @@ def _feature_extraction(x_train: np.array, feature_representation_model: Model)
108111
# Process in batches to avoid memory issues
109112
batch_size = 256
110113
num_batches = int(np.ceil(len(data) / batch_size))
111-
features = []
114+
features: List[tf.Tensor] = []
112115

113116
for i in range(num_batches):
114117
start_idx = i * batch_size
@@ -118,17 +121,18 @@ def _feature_extraction(x_train: np.array, feature_representation_model: Model)
118121
features.append(batch_features)
119122

120123
# Concatenate all batches
124+
final_features_tensor: tf.Tensor
121125
if len(features) > 1:
122-
features = tf.concat(features, axis=0)
126+
final_features_tensor = tf.concat(features, axis=0)
123127
else:
124-
features = features[0]
128+
final_features_tensor = features[0]
125129

126-
return features.numpy()
130+
return final_features_tensor.numpy()
127131

128132

129133
def _cluster_classes(
130-
y_train: np.array, unique_classes: set[int], features: np.array, clusterer: ClusterMixin
131-
) -> (np.array, dict):
134+
y_train: np.ndarray, unique_classes: set[int], features: np.ndarray, clusterer: ClusterMixin
135+
) -> Tuple[np.ndarray, dict]:
132136
"""
133137
Clusters all the classes in the given dataset into uniquely identifiable clusters.
134138
@@ -183,7 +187,7 @@ class ClusteringCentroidAnalysis(PoisonFilteringDefence):
183187
valid_clustering = ["DBSCAN"]
184188
valid_reduce = ["UMAP"]
185189

186-
def _get_benign_data(self) -> (np.ndarray, np.ndarray):
190+
def _get_benign_data(self) -> Tuple[np.ndarray, np.ndarray]:
187191
"""
188192
Retrieves the benign data from the training data using benign indices
189193
@@ -194,7 +198,7 @@ def _get_benign_data(self) -> (np.ndarray, np.ndarray):
194198

195199
return self.x_train[self.benign_indices], self.y_train[self.benign_indices]
196200

197-
def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
201+
def _extract_submodels(self, final_feature_layer_name: str) -> Tuple[Model, Model]:
198202
"""
199203
Extracts the feature representation and final classifier submodels from the original classifier.
200204
Composition of both models should result in the original model
@@ -204,7 +208,7 @@ def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
204208
:return: (feature_representation_submodel, classifying_submodel)
205209
"""
206210
logging.info("Extracting submodels...")
207-
keras_model = self.classifier.model
211+
keras_model: Model = self.classifier.model
208212

209213
try:
210214
final_feature_layer = keras_model.get_layer(name=final_feature_layer_name)
@@ -236,9 +240,9 @@ def _extract_submodels(self, final_feature_layer_name: str) -> (Model, Model):
236240

237241
return feature_representation_model, classifying_submodel
238242

239-
def get_clusters(self) -> np.array:
243+
def get_clusters(self) -> np.ndarray:
240244
"""
241-
:return: np.array with m+1 columns, where m is dimensionality of the dimensionality reducer's output.
245+
:return: np.ndarray with m+1 columns, where m is dimensionality of the dimensionality reducer's output.
242246
m columns are used for feature representations and the last column is used for cluster label.
243247
"""
244248
# Ensure features have been reduced and clustering has been performed
@@ -259,7 +263,7 @@ def __init__(
259263
classifier: "CLASSIFIER_TYPE",
260264
x_train: np.ndarray,
261265
y_train: np.ndarray,
262-
benign_indices: np.array,
266+
benign_indices: np.ndarray,
263267
final_feature_layer_name: str,
264268
misclassification_threshold: float,
265269
reducer=UMAP(n_neighbors=5, min_dist=0),
@@ -313,7 +317,7 @@ def evaluate_defence(self, is_clean: np.ndarray, **kwargs) -> str:
313317

314318
return confusion_matrix_json
315319

316-
def _calculate_misclassification_rate(self, class_label: int, deviation: np.array) -> np.float64:
320+
def _calculate_misclassification_rate(self, class_label: int, deviation: np.ndarray) -> np.float64:
317321
"""
318322
Calculate the misclassification rate when applying a deviation to other classes.
319323
@@ -406,12 +410,12 @@ def predict_with_deviation(features, deviation):
406410

407411
return np.float64(misclassified_elements) / np.float64(total_elements)
408412

409-
def detect_poison(self, **kwargs) -> (dict, list[int]):
413+
def detect_poison(self, **kwargs) -> Tuple[dict, List[int]]:
410414

411415
# saves important information about the algorithm execution for further analysis
412-
report = dict()
416+
report: Dict[str, Any] = dict()
413417

414-
self.is_clean = np.ones(len(self.y_train))
418+
self.is_clean_np = np.ones(len(self.y_train))
415419

416420
self.features = _feature_extraction(self.x_train, self.feature_representation_model)
417421

@@ -428,7 +432,7 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
428432

429433
# outliers are poisoned
430434
outlier_indices = np.where(self.class_cluster_labels == -1)[0]
431-
self.is_clean[outlier_indices] = 0
435+
self.is_clean_np[outlier_indices] = 0
432436

433437
# cluster labels are saved in the report
434438
report["cluster_labels"] = self.get_clusters()
@@ -479,9 +483,11 @@ def detect_poison(self, **kwargs) -> (dict, list[int]):
479483
for cluster_label, mr in misclassification_rates.items():
480484
if mr >= 1 - self.misclassification_threshold:
481485
cluster_indices = np.where(self.class_cluster_labels == cluster_label)[0]
482-
self.is_clean[cluster_indices] = 0
486+
self.is_clean_np[cluster_indices] = 0
483487
logging.info(
484488
f"Cluster k={cluster_label} i={self.cluster_class_mapping[cluster_label]} considered poison ({misclassification_rates[cluster_label]} >= {1 - self.misclassification_threshold})"
485489
)
486490

491+
# Forced conversion for interface consistency
492+
self.is_clean: List[int] = self.is_clean_np.tolist()
487493
return report, self.is_clean.copy()

art/performance_monitor.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import threading
77
import time
88
from datetime import datetime
9-
from typing import Dict, List, Optional, Any
9+
from typing import Dict, List, Optional, Any, Union
1010

1111
import numpy as np
1212
import pandas as pd
@@ -63,8 +63,8 @@ def __init__(self, interval: float = 0.1) -> None:
6363
self.cpu_percentages: List[float] = []
6464
self.memory_usages: List[float] = []
6565
self.timestamps: List[float] = []
66-
self.gpu_usages: List[float] = []
67-
self.gpu_memories: List[float] = []
66+
self.gpu_usages: List[Any] = []
67+
self.gpu_memories: List[Any] = []
6868
self.stop_flag = False
6969
self.process = psutil.Process(os.getpid())
7070

@@ -238,7 +238,7 @@ def plot_results(self, title: Optional[str] = None) -> Optional[Any]:
238238
axes[3].set_ylabel("GPU Memory (MB)")
239239
axes[3].grid(True)
240240

241-
plt.tight_layout(rect=[0, 0, 1, 0.95])
241+
plt.tight_layout(rect=(0.0, 0.0, 1.0, 0.95))
242242
return fig
243243

244244

@@ -259,8 +259,8 @@ def __init__(self, task_name: str = "Task", plot: bool = False, save_data: bool
259259
self.plot = plot
260260
self.save_data = save_data
261261
self.monitor = ResourceMonitor()
262-
self.start_time = 0
263-
self.end_time = 0
262+
self.start_time: float = 0
263+
self.end_time: float = 0
264264

265265
def __enter__(self) -> "PerformanceTimer":
266266
"""Start monitoring when entering the context."""

tests/defences/detector/poison/test_clustering_centroid_analysis.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,8 +1024,10 @@ def test_detect_poison_all_benign(self):
10241024
):
10251025
report, is_clean = defence.detect_poison()
10261026

1027+
is_clean_np = np.array(is_clean, dtype=np.int_)
1028+
10271029
self.assertIsInstance(report, dict)
1028-
self.assertEqual(len(self.y_train), len(is_clean[is_clean == 1]))
1030+
self.assertEqual(len(self.y_train), len(is_clean_np[is_clean_np == 1]))
10291031
# In the all-benign case, no samples should be marked as poisoned
10301032
self.assertEqual(np.sum(is_clean), len(self.y_train))
10311033

@@ -1093,8 +1095,10 @@ def mock_misclass_rate(class_label, deviation):
10931095
):
10941096
report, is_clean = defence.detect_poison()
10951097

1098+
is_clean_np = np.array(is_clean, dtype=np.int_)
1099+
10961100
# all elements in class 0 are poisoned. No outliers --> all poisoned elements are class 0
1097-
np.testing.assert_equal(np.where(is_clean == 0), np.where(self.y_train == 0))
1101+
np.testing.assert_equal(np.where(is_clean_np == 0), np.where(self.y_train == 0))
10981102

10991103
def test_detect_poison_both_mechanisms(self):
11001104
"""
@@ -1129,12 +1133,14 @@ def mock_misclass_rate(class_label, deviation):
11291133
):
11301134
report, is_clean = defence.detect_poison()
11311135

1136+
is_clean_np = np.array(is_clean, dtype=np.int_)
1137+
11321138
self.assertIsInstance(report, dict)
11331139
# all elements in class 1 are poisoned
1134-
self.assertGreater(len(is_clean[is_clean == 0]), len(self.y_train[self.y_train == 1]))
1135-
self.assertTrue(np.all(is_clean[np.where(self.y_train == 1)] == 0))
1140+
self.assertGreater(len(is_clean_np[is_clean_np == 0]), len(self.y_train[self.y_train == 1]))
1141+
self.assertTrue(np.all(is_clean_np[np.where(self.y_train == 1)] == 0))
11361142
# most elements in class 0 are detected as clean. Poisoned ones are outliers. FIXME: can I make this more robust?
1137-
self.assertLess(np.mean(self.y_train[np.where(is_clean == 1)]), 0.2)
1143+
self.assertLess(np.mean(self.y_train[np.where(is_clean_np == 1)]), 0.2)
11381144

11391145

11401146
class TestEvaluateDefence(unittest.TestCase):

0 commit comments

Comments
 (0)