Trusted-AI
diff --git a/‎art/attacks/extraction/functionally_equivalent_extraction.py
Lines changed: 52 additions & 39 deletions b/‎art/attacks/extraction/functionally_equivalent_extraction.py
Lines changed: 52 additions & 39 deletions
diff --git a/‎art/attacks/poisoning/adversarial_embedding_attack.py
Lines changed: 50 additions & 43 deletions b/‎art/attacks/poisoning/adversarial_embedding_attack.py
Lines changed: 50 additions & 43 deletions
diff --git a/‎art/attacks/poisoning/feature_collision_attack.py
Lines changed: 3 additions & 28 deletions b/‎art/attacks/poisoning/feature_collision_attack.py
Lines changed: 3 additions & 28 deletions
diff --git a/‎art/attacks/poisoning/gradient_matching_attack.py
Lines changed: 1 addition & 1 deletion b/‎art/attacks/poisoning/gradient_matching_attack.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎art/estimators/classification/keras.py
Lines changed: 19 additions & 12 deletions b/‎art/estimators/classification/keras.py
Lines changed: 19 additions & 12 deletions
@@ -110,9 +110,9 @@ def extract(
         :param rel_diff_slope: Relative slope difference at critical points.
         :param rel_diff_value: Relative value difference at critical points.
         :param delta_init_value: Initial delta of weight value search.
-        :param delta_value_max: Maximum delta  of weight value search.
+        :param delta_value_max: Maximum delta of weight value search.
         :param d2_min: Minimum acceptable value of sum of absolute second derivatives.
-        :param d_step:  Step size of delta increase.
+        :param d_step: Step size of delta increase.
         :param delta_sign: Delta of weight sign search.
         :param unit_vector_scale: Multiplicative scale of the unit vector `e_j`.
         :param ftol: Tolerance for termination by the change of the cost function.
@@ -309,6 +309,7 @@ def _weight_recovery(
 
         for i in range(self.num_neurons):
             for k in range(self.num_features):
+                print("a0_pairwise_ratios", i, k)
                 self.a0_pairwise_ratios[k, i] = d2_ol_d2ej_xi[0, i] / d2_ol_d2ej_xi[k, i]
 
         # Weight Sign Recovery
@@ -428,84 +429,96 @@ def f_w_1_b_1(w_1_b_1_i):
 
 # pylint: disable=invalid-name
 if __name__ == "__main__":
+    import os
+    import numpy as np
     import tensorflow as tf
 
-    tf.compat.v1.disable_eager_execution()
-    tf.keras.backend.set_floatx("float64")
-
-    from tensorflow.keras.datasets import mnist
-    from tensorflow.keras.models import Sequential
-    from tensorflow.keras.layers import Dense
+    from keras.models import Sequential, load_model
+    from keras.layers import Dense, Input
+    from keras.losses import CategoricalCrossentropy
+    from keras.optimizers import Adam
+    from keras.utils import to_categorical
+    from keras.datasets import mnist
 
+    # Keras 3.10+ runs in eager mode by default (do NOT disable it!)
+    tf.keras.backend.set_floatx("float64")
     np.random.seed(1)
-    number_neurons = 16
-    batch_size = 128
+
+    # Hyperparameters
+    number_neurons = 4
+    batch_size = 10
     number_classes = 10
-    epochs = 10
+    epochs = 100
     img_rows = 28
     img_cols = 28
     number_channels = 1
 
+    # Load and reshape data
     (x_train, y_train), (x_test, y_test) = mnist.load_data()
-    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, number_channels)
-    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, number_channels)
-    input_shape = (number_channels * img_rows * img_cols,)
-
-    x_train = x_train.reshape((x_train.shape[0], number_channels * img_rows * img_cols)).astype("float64")
-    x_test = x_test.reshape((x_test.shape[0], number_channels * img_rows * img_cols)).astype("float64")
+    x_train = x_train.reshape((x_train.shape[0], -1)).astype("float64")  # shape = (60000, 784)
+    x_test = x_test.reshape((x_test.shape[0], -1)).astype("float64")  # shape = (10000, 784)
 
+    # Standardize
     mean = np.mean(x_train)
     std = np.std(x_train)
-
     x_train = (x_train - mean) / std
     x_test = (x_test - mean) / std
 
-    y_train = tf.keras.utils.to_categorical(y_train, number_classes)
-    y_test = tf.keras.utils.to_categorical(y_test, number_classes)
+    # One-hot encode
+    y_train = to_categorical(y_train, number_classes)
+    y_test = to_categorical(y_test, number_classes)
 
-    if os.path.isfile("./model.h5"):
-        model = tf.keras.models.load_model("./model.h5")
-    else:
-        model = Sequential()
-        model.add(Dense(number_neurons, activation="relu", input_shape=input_shape))
-        model.add(Dense(number_classes, activation="linear"))
+    # Define input shape
+    input_shape = (784,)
 
+    # Load or create model
+    if os.path.isfile("./model.keras"):
+        model = load_model("./model.keras", compile=False)
         model.compile(
-            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
-            optimizer=tf.keras.optimizers.Adam(
-                learning_rate=0.0001,
-            ),
-            metrics=["accuracy"],
+            loss=CategoricalCrossentropy(from_logits=True), optimizer=Adam(learning_rate=0.0001), metrics=["accuracy"]
+        )
+    else:
+        model = Sequential(
+            [
+                Input(shape=input_shape),
+                Dense(number_neurons, activation="relu"),
+                Dense(number_classes, activation="linear"),
+            ]
+        )
+        model.compile(
+            loss=CategoricalCrossentropy(from_logits=True), optimizer=Adam(learning_rate=0.001), metrics=["accuracy"]
         )
-
         model.fit(
-            x_train,
-            y_train,
+            x_train[0:100],
+            y_train[0:100],
             batch_size=batch_size,
             epochs=epochs,
             verbose=1,
             validation_data=(x_test, y_test),
         )
+        model.save("./model.keras")
 
-        model.save("./model.h5")
-
+    # Evaluate target model
     score_target = model.evaluate(x_test, y_test, verbose=0)
 
+    # Wrap with ART
     target_classifier = KerasClassifier(model=model, use_logits=True, clip_values=(0, 1))
 
+    # Run Functionally Equivalent Extraction
     fee = FunctionallyEquivalentExtraction(classifier=target_classifier, num_neurons=number_neurons)  # type: ignore
     bbc = fee.extract(x_test[0:100])
 
+    # Predictions
     y_test_predicted_extracted = bbc.predict(x_test)
     y_test_predicted_target = target_classifier.predict(x_test)
 
+    # Metrics
     print("Target model - Test accuracy:", score_target[1])
     print(
         "Extracted model - Test accuracy:",
-        np.sum(np.argmax(y_test_predicted_extracted, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0],
+        np.mean(np.argmax(y_test_predicted_extracted, axis=1) == np.argmax(y_test, axis=1)),
     )
     print(
         "Extracted model - Test Fidelity:",
-        np.sum(np.argmax(y_test_predicted_extracted, axis=1) == np.argmax(y_test_predicted_target, axis=1))
-        / y_test_predicted_target.shape[0],
+        np.mean(np.argmax(y_test_predicted_extracted, axis=1) == np.argmax(y_test_predicted_target, axis=1)),
     )
@@ -104,60 +104,65 @@ def __init__(
         self._check_params()
 
         if isinstance(self.estimator, KerasClassifier):
-            using_tf_keras = "tensorflow.python.keras" in str(type(self.estimator.model))
-            if using_tf_keras:  # pragma: no cover
-                from tensorflow.keras.models import Model, clone_model
-                from tensorflow.keras.layers import (
-                    GaussianNoise,
-                    Dense,
-                    BatchNormalization,
-                    LeakyReLU,
-                )
-                from tensorflow.keras.optimizers import Adam
-
-                opt = Adam(learning_rate=self.learning_rate)
-
-            else:
-                from keras import Model
-                from keras.models import clone_model
-                from keras.layers import GaussianNoise, Dense, BatchNormalization, LeakyReLU
 
-                try:
-                    from keras.optimizers import Adam
-
-                    opt = Adam(learning_rate=self.learning_rate)
-                except ImportError:
-                    from keras.optimizers import adam_v2
+            from keras.models import Model, clone_model
+            from keras.layers import (
+                GaussianNoise,
+                Dense,
+                BatchNormalization,
+                LeakyReLU,
+                Input,
+                Flatten,
+            )
+            from keras.optimizers import Adam
+            import keras
 
-                    opt = adam_v2.Adam(learning_rate=self.learning_rate)
+            opt = Adam(learning_rate=self.learning_rate)
 
+            # Clone and build model
             if clone:
-                self.orig_model = clone_model(self.estimator.model, input_tensors=self.estimator.model.inputs)
+                self.orig_model = clone_model(self.estimator.model)
+                self.orig_model.set_weights(self.estimator.model.get_weights())
             else:
                 self.orig_model = self.estimator.model
+
+            # Ensure model is built (important for Sequential models)
+            if not self.orig_model.built:
+                # Provide a dummy input shape based on the estimator input
+                dummy_input_shape = (None,) + self.estimator.input_shape[1:]
+                self.orig_model.build(dummy_input_shape)
+
+            # Access model input/output (safe for Functional & Sequential)
             model_input = self.orig_model.inputs
             init_model_output = self.orig_model(model_input)
 
-            # Extracting feature tensor
+            # Extract feature layer output
             if isinstance(self.feature_layer, int):
                 feature_layer_tensor = self.orig_model.layers[self.feature_layer].output
             else:
-                feature_layer_tensor = self.orig_model.get_layer(name=feature_layer).output
-            feature_layer_output = Model(inputs=[model_input], outputs=[feature_layer_tensor])
-
-            # Architecture for discriminator
-            discriminator_input = feature_layer_output(model_input)
-            discriminator_input = GaussianNoise(stddev=1)(discriminator_input)
-            dense_layer_1 = Dense(self.discriminator_layer_1)(discriminator_input)
-            norm_1_layer = BatchNormalization()(dense_layer_1)
-            leaky_layer_1 = LeakyReLU(alpha=0.2)(norm_1_layer)
-            dense_layer_2 = Dense(self.discriminator_layer_2)(leaky_layer_1)
-            norm_2_layer = BatchNormalization()(dense_layer_2)
-            leaky_layer_2 = LeakyReLU(alpha=0.2)(norm_2_layer)
-            backdoor_detect = Dense(2, activation="softmax", name="backdoor_detect")(leaky_layer_2)
-
-            # Creating embedded model
-            self.embed_model = Model(inputs=self.orig_model.inputs, outputs=[init_model_output, backdoor_detect])
+                feature_layer_tensor = self.orig_model.get_layer(name=self.feature_layer).output
+
+            feature_extractor = Model(inputs=model_input, outputs=feature_layer_tensor)
+
+            # Discriminator architecture
+            discriminator_input = feature_extractor(model_input)
+            if len(discriminator_input.shape) > 2:
+                discriminator_input = Flatten()(discriminator_input)
+
+            discriminator_input = GaussianNoise(stddev=1.0)(discriminator_input)
+
+            x = Dense(self.discriminator_layer_1)(discriminator_input)
+            x = BatchNormalization()(x)
+            x = LeakyReLU(alpha=0.2)(x)
+
+            x = Dense(self.discriminator_layer_2)(x)
+            x = BatchNormalization()(x)
+            x = LeakyReLU(alpha=0.2)(x)
+
+            backdoor_detect = Dense(2, activation="softmax", name="backdoor_detect")(x)
+
+            # Final embedded model
+            self.embed_model = Model(inputs=model_input, outputs=[init_model_output, backdoor_detect])
 
             # Add backdoor detection loss
             model_name = self.orig_model.name
@@ -175,7 +180,9 @@ def __init__(
             else:
                 raise TypeError(f"Cannot read model loss value of type {type(model_loss)}")
 
-            self.embed_model.compile(optimizer=opt, loss=losses, loss_weights=loss_weights, metrics=["accuracy"])
+            self.embed_model.compile(
+                optimizer=opt, loss=losses, loss_weights=loss_weights, metrics=["accuracy", "accuracy"]
+            )
         else:
             raise NotImplementedError("This attack currently only supports Keras.")
 
 
@@ -30,7 +30,6 @@
 from art.attacks.attack import PoisoningAttackWhiteBox
 from art.estimators import BaseEstimator, NeuralNetworkMixin
 from art.estimators.classification.classifier import ClassifierMixin
-from art.estimators.classification.keras import KerasClassifier
 from art.estimators.classification.pytorch import PyTorchClassifier
 
 
@@ -112,14 +111,7 @@ def __init__(
         self.verbose = verbose
         self._check_params()
 
-        if isinstance(self.estimator, KerasClassifier):
-            self.target_placeholder, self.target_feature_rep = self.estimator.get_activations(
-                self.target, self.feature_layer, 1, framework=True
-            )
-            self.poison_placeholder, self.poison_feature_rep = self.estimator.get_activations(
-                self.target, self.feature_layer, 1, framework=True
-            )
-        elif isinstance(self.estimator, PyTorchClassifier):
+        if isinstance(self.estimator, PyTorchClassifier):
             self.target_feature_rep = self.estimator.get_activations(self.target, self.feature_layer, 1, framework=True)
             self.poison_feature_rep = self.estimator.get_activations(self.target, self.feature_layer, 1, framework=True)
         else:
@@ -192,14 +184,7 @@ def forward_step(self, poison: np.ndarray) -> np.ndarray:
         :param poison: the current poison samples.
         :return: poison example closer in feature representation to target space.
         """
-        if isinstance(self.estimator, KerasClassifier):
-            (attack_grad,) = self.estimator.custom_loss_gradient(
-                self.attack_loss,
-                [self.poison_placeholder, self.target_placeholder],
-                [poison, self.target],
-                name="feature_collision_" + str(self.feature_layer),
-            )
-        elif isinstance(self.estimator, PyTorchClassifier):
+        if isinstance(self.estimator, PyTorchClassifier):
             attack_grad = self.estimator.custom_loss_gradient(self.attack_loss, poison, self.target, self.feature_layer)
         else:
             raise ValueError("The type of the estimator is not supported.")
@@ -295,22 +280,12 @@ def tensor_norm(tensor, norm_type: int | float | str = 2):  # pylint: disable=in
     :param norm_type: Order of the norm.
     :return: A tensor with the norm applied.
     """
-    tf_tensor_types = (
-        "tensorflow.python.framework.ops.Tensor",
-        "tensorflow.python.framework.ops.EagerTensor",
-        "tensorflow.python.framework.ops.SymbolicTensor",
-    )
     torch_tensor_types = ("torch.Tensor", "torch.float", "torch.double", "torch.long")
-    supported_types = tf_tensor_types + torch_tensor_types
+    supported_types = torch_tensor_types
     tensor_type = get_class_name(tensor)
     if tensor_type not in supported_types:  # pragma: no cover
         raise TypeError("Tensor type `" + tensor_type + "` is not supported")
 
-    if tensor_type in tf_tensor_types:
-        import tensorflow as tf
-
-        return tf.norm(tensor, ord=norm_type)
-
     if tensor_type in torch_tensor_types:  # pragma: no cover
         import torch
 
 
@@ -380,7 +380,7 @@ def __len__(self):
                 self.backdoor_model.zero_grad()
                 loss, poisoned_samples = self.backdoor_model(x, indices, y, self.grad_ws_norm)
                 loss.backward()
-                self.backdoor_model.noise_embedding.embedding_layer.weight.grad.sign_()
+                self.backdoor_model.noise_embedding.embedding_layer.weight.grad.sign_()  # type: ignore
                 self.optimizer.step()
                 sum_loss += loss.clone().cpu().detach().numpy()
                 count += 1
 
@@ -210,24 +210,31 @@ def compute_loss(self, x: np.ndarray, y: np.ndarray, reduction: str = "none", **
         predictions = self._model(x_tf, training=False)
 
         # Compute loss (no need to access .loss attribute directly)
-        loss_tensor = self._model.compiled_loss(y_tf, predictions, regularization_losses=None)
+        loss_tensor = self._model.compiled_loss(y_tf, predictions)
 
         # Convert loss tensor to numpy
         loss_value = loss_tensor.numpy()
 
         # Apply user-specified reduction
         if reduction == "none":
-            pass
+            loss_value_list = []
+            for i in range(x_tf.shape[0]):
+                predictions_i = self._model(x_tf[i : i + 1], training=False)
+                loss_tensor_i = self._model.compiled_loss(y_tf[i : i + 1], predictions_i)
+                loss_value_list.append(loss_tensor_i.numpy())
+            loss_value = np.array(loss_value_list)
+
         elif reduction == "mean":
-            if loss_value.ndim > 0:
-                loss_value = np.mean(loss_value, axis=0)
-            else:
-                loss_value = np.mean(loss_value)
+            predictions = self._model(x_tf, training=False)
+            loss_tensor = self._model.compiled_loss(y_tf, predictions)
+            loss_value = loss_tensor.numpy()
+
         elif reduction == "sum":
-            if loss_value.ndim > 0:
-                loss_value = np.sum(loss_value, axis=0)
-            else:
-                loss_value = np.sum(loss_value)
+            loss_value = 0
+            for i in range(x_tf.shape[0]):
+                predictions_i = self._model(x_tf[i : i + 1], training=False)
+                loss_tensor_i = self._model.compiled_loss(y_tf[i : i + 1], predictions_i)
+                loss_value += loss_tensor_i.numpy()
 
         return loss_value
 
@@ -391,9 +398,9 @@ def predict(self, x: np.ndarray, batch_size: int = 128, training_mode: bool = Fa
 
         # Run predictions with batching
         if training_mode:
-            predictions = self._model(x_preprocessed, training=training_mode)
+            predictions = self._model(x_preprocessed, training=training_mode, verbose=False)
         else:
-            predictions = self._model.predict(x_preprocessed, batch_size=batch_size)
+            predictions = self._model.predict(x_preprocessed, batch_size=batch_size, verbose=False)
 
         # Apply postprocessing
         predictions = self._apply_postprocessing(preds=predictions, fit=False)