Remove UNSW_NB15 load. Has not been used (yet)

c4ts0up · c4ts0up · commit 559996ef8c3c · 2025-06-10T22:50:52.000-05:00
Signed-off-by: Álvaro Bacca Peña &lt;a.baccap@uniandes.edu.co&gt;
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -23,8 +23,7 @@
 import numpy as np
 import tensorflow as tf
 
-from art.utils import projection, random_sphere, uniform_sample_from_sphere_or_ball, to_categorical, least_likely_class, \
-    load_unsw_nb15
+from art.utils import projection, random_sphere, uniform_sample_from_sphere_or_ball, to_categorical, least_likely_class
 from art.utils import load_dataset, load_iris, load_mnist, load_nursery, load_cifar10
 from art.utils import second_most_likely_class, random_targets, get_label_conf, get_labels_np_array, preprocess
 from art.utils import compute_success_array, compute_success, check_and_transform_label_format
@@ -470,68 +469,6 @@ def test_nursery(self):
         self.assertEqual(x_train.shape[0], y_train.shape[0])
         self.assertEqual(x_test.shape[0], y_test.shape[0])
 
-    # FIXME: 52 secs to run. Its too long. Mock the web result with a subsample of the data
-    def test_load_unsw_nb15_full(self):
-        """Test loading the full dataset with frac=1.0 (default)."""
-        (x_train, y_train), (x_test, y_test) = load_unsw_nb15()
-
-        # size validation
-        total_samples = 2_540_047       # FIXME: official sources say there are 2_540_044, but I counted in Excel
-        self.assertEqual(x_train.shape[0] + x_test.shape[0], total_samples)
-        self.assertEqual(len(y_train) + len(y_test), total_samples)
-        self.assertEqual(x_train.shape[0], len(y_train))
-        self.assertEqual(x_test.shape[0], len(x_test))
-
-
-    def test_load_unsw_nb15_frac(self):
-        """Test loading the full dataset with frac=0.1"""
-        (x_train, y_train), (x_test, y_test) = load_unsw_nb15(frac=0.1)
-
-        # size validation
-        total_samples = 254_005
-        self.assertEqual(x_train.shape[0] + x_test.shape[0], total_samples)
-        self.assertEqual(len(y_train) + len(y_test), total_samples)
-        self.assertEqual(x_train.shape[0], len(y_train))
-        self.assertEqual(x_test.shape[0], len(x_test))
-
-
-        # y has column 'attack_cat'
-        self.assertIn("label", y_train.columns,
-                      "Column 'label' is missing from y_train")
-        self.assertIn("label", y_test.columns,
-                      "Column 'label' is missing from y_test")
-
-        # x doesn't have the column 'attack_cat'
-        self.assertNotIn("label", x_train.columns,
-                         "Column 'label' should not be in x_train")
-        self.assertNotIn("label", x_test.columns,
-                         "Column 'label' should not be in y_train")
-
-        # feature count is correct (total 49)
-        self.assertEqual(49 - 2, len(x_train.columns),
-                         "x_train doesn't have the 47 corresponding features")
-        self.assertEqual(49 - 2, len(x_test.columns),
-                         "x_test doesn't have the 47 corresponding features")
-        self.assertEqual(49 - 48, len(y_train.columns),
-                         "y_train doesn't have the single corresponding features")
-        self.assertEqual(49 - 48, len(y_test.columns),
-                         "y_test doesn't have the single corresponding features")
-
-        # test column types
-        actual_dtypes = x_train.dtypes.astype(str).value_counts().to_dict()
-
-        expected_dtypes = {
-            'float64': 10,
-            'int64': 30, # -1 porque es el label
-            'object': 7, # -1 porque ID es removido
-        }
-
-        for dtype, count in expected_dtypes.items():
-            self.assertEqual(count,
-                             actual_dtypes.get(dtype, 0),
-                             f"Expected {count} columns of type {dtype}, but found {actual_dtypes.get(dtype, 0)}")
-
-
     def test_segment_by_class(self):
         data = np.array([[3, 2], [9, 2], [4, 0], [9, 0]])
         classes = to_categorical(np.array([2, 1, 0, 1]))