RHOAIENG-26513: tests(containers): add unittests for verifying data science libraries in workbench images (#1421)

jiridanek · web-flow · commit 9bc0c11ac193 · 2025-07-22T17:10:42.000+02:00
Introduces a new test suite (`libraries_testunits.py`) to validate key data science libraries (NumPy, Pandas, scikit-learn, Matplotlib, PyTorch, TorchVision, and TorchAudio) within workbench container images. Adds integration of these tests in `libraries_test.py`.

Extends `docker_utils.container_cp` to accept `PathLike` types.
diff --git a/tests/containers/docker_utils.py b/tests/containers/docker_utils.py
@@ -7,6 +7,7 @@
 import sys
 import tarfile
 import time
+from os import PathLike
 from typing import TYPE_CHECKING
 
 import podman
@@ -45,7 +46,9 @@ def wait_for_exit(self) -> int:
         return container.attrs["State"]["ExitCode"]
 
 
-def container_cp(container: Container, src: str, dst: str, user: int | None = None, group: int | None = None) -> None:
+def container_cp(
+    container: Container, src: str | PathLike, dst: str, user: int | None = None, group: int | None = None
+) -> None:
     """
     Copies a directory into a container
     From https://stackoverflow.com/questions/46390309/how-to-copy-a-file-from-host-to-container-using-docker-py-docker-sdk
diff --git a/tests/containers/workbenches/jupyterlab/libraries_test.py b/tests/containers/workbenches/jupyterlab/libraries_test.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import pathlib
+from typing import TYPE_CHECKING
+
+from tests.containers import docker_utils
+from tests.containers.workbenches.workbench_image_test import WorkbenchContainer, grab_and_check_logs
+
+if TYPE_CHECKING:
+    import pytest_subtests
+
+    from tests.containers.conftest import Image
+
+
+class TestWorkbenchImage:
+    """Tests for workbench images in this repository.
+    A workbench image is an image running a web IDE that listens on port 8888."""
+
+    def test_image_entrypoint_starts(
+        self, subtests: pytest_subtests.SubTests, jupyterlab_datascience_image: Image
+    ) -> None:
+        container = WorkbenchContainer(image=jupyterlab_datascience_image.name, user=1000, group_add=[0])
+        try:
+            try:
+                container.start()
+                # check explicitly that we can connect to the ide running in the workbench
+                with subtests.test("Attempting to connect to the workbench..."):
+                    container._connect()
+                unittests = pathlib.Path(__file__).parent / "libraries_testunits.py"
+                docker_utils.container_cp(container.get_wrapped_container(), unittests, "/opt/app-root/src/")
+                ecode, stdout = container.exec(
+                    [
+                        "env",
+                        f"IMAGE={jupyterlab_datascience_image.labels['name']}",
+                        "bash",
+                        "-c",
+                        "python3 /opt/app-root/src/libraries_testunits.py",
+                    ]
+                )
+                stdout_decoded = stdout.decode()
+                print(stdout_decoded)
+                assert ecode == 0, stdout_decoded
+            finally:
+                # try to grab logs regardless of whether container started or not
+                grab_and_check_logs(subtests, container)
+        finally:
+            docker_utils.NotebookContainer(container).stop(timeout=0)
diff --git a/tests/containers/workbenches/jupyterlab/libraries_testunits.py b/tests/containers/workbenches/jupyterlab/libraries_testunits.py
@@ -0,0 +1,120 @@
+import os
+import unittest
+
+"""This is run inside images by libraries_test.py"""
+
+# Suppress noisy logs from libraries, especially during testing
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+os.environ["KMP_WARNINGS"] = "0"
+
+
+# ruff: noqa: PLC0415 `import` should be at the top-level of a file
+class TestDataScienceLibs(unittest.TestCase):
+    """A test suite to verify the basic functionality of key data science libraries."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Set up data once for all tests in this class."""
+        print("--- 🧪 Verifying Data Science Environment ---")
+        cls.image = os.environ["IMAGE"]
+        print(f"Image: {cls.image}")
+
+    def setUp(self):
+        self.tear_downs = []
+
+    def tearDown(self):
+        """Clean up resources after all tests in this class have run."""
+        for tear_down in self.tear_downs:
+            tear_down()
+        super().tearDown()
+
+    def test_numpy(self):
+        """Tests numpy array creation and basic operations."""
+        import numpy as np  # pyright: ignore[reportMissingImports]
+
+        arr = np.array([[1, 2], [3, 4]])
+        self.assertEqual(arr.shape, (2, 2), "Numpy array shape is incorrect.")
+        self.assertEqual(np.sum(arr), 10, "Numpy sum calculation is incorrect.")
+        print("✅ NumPy test passed.")
+
+    def test_pandas(self):
+        """Tests pandas DataFrame creation."""
+        import pandas as pd  # pyright: ignore[reportMissingImports]
+
+        df = pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+        self.assertIsInstance(df, pd.DataFrame, "Object is not a Pandas DataFrame.")
+        self.assertEqual(df.shape, (2, 2), "Pandas DataFrame shape is incorrect.")
+        print("✅ Pandas test passed.")
+
+    def test_sklearn(self):
+        """Tests scikit-learn model fitting."""
+        from sklearn.cluster import KMeans  # pyright: ignore[reportMissingImports]
+        from sklearn.datasets import make_blobs  # pyright: ignore[reportMissingImports]
+
+        X, _y = make_blobs(n_samples=100, centers=3, random_state=42)
+
+        model = KMeans(n_clusters=3, random_state=42, n_init="auto")
+        model.fit(X)
+        self.assertEqual(model.cluster_centers_.shape, (3, 2), "Cluster centers shape is incorrect.")
+        self.assertIsNotNone(model.labels_, "Scikit-learn model failed to fit.")
+        print("✅ Scikit-learn test passed.")
+
+    def test_matplotlib(self):
+        """Tests matplotlib plot creation and saving to a file."""
+        import matplotlib.pyplot as plt  # pyright: ignore[reportMissingImports]
+        from sklearn.datasets import make_blobs  # pyright: ignore[reportMissingImports]
+
+        X, y = make_blobs(n_samples=50, centers=3, n_features=2, random_state=42)
+        plot_filename = "matplotlib_unittest.png"
+
+        fig, ax = plt.subplots()
+        ax.scatter(X[:, 0], X[:, 1], c=y)
+        ax.set_title("Matplotlib Unittest")
+        plt.savefig(plot_filename)
+        self.tear_downs.append(lambda: os.remove(plot_filename))
+        plt.close(fig)  # Close the figure to free up memory
+
+        self.assertTrue(os.path.exists(plot_filename), "Matplotlib did not create the plot file.")
+        print("✅ Matplotlib test passed.")
+
+    def test_torch(self):
+        """🧪 Tests basic PyTorch tensor operations."""
+        if "-pytorch-" not in self.image:
+            self.skipTest("Not a Torch image")
+        import torch  # pyright: ignore[reportMissingImports]
+
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        tensor = torch.rand(2, 3, device=device)
+        self.assertEqual(tensor.shape, (2, 3), "PyTorch tensor shape is incorrect.")
+        self.assertTrue(str(tensor.device).startswith(device), "Tensor was not created on the correct device.")
+        print(f"✅ PyTorch test passed (using device: {device}).")
+
+    def test_torchvision(self):
+        """🧪 Tests torchvision model loading and inference."""
+        if "-pytorch-" not in self.image:
+            self.skipTest("Not a Torch image")
+        import torch  # pyright: ignore[reportMissingImports]
+        import torchvision  # pyright: ignore[reportMissingImports]
+
+        model = torchvision.models.resnet18(weights=None)  # Use weights=None for faster testing
+        model.eval()
+        dummy_input = torch.randn(1, 3, 224, 224)
+        with torch.no_grad():
+            output = model(dummy_input)
+        self.assertEqual(output.shape, (1, 1000), "Torchvision model output shape is incorrect.")
+        print("✅ Torchvision test passed.")
+
+    def test_torchaudio(self):
+        """🧪 Tests torchaudio waveform generation."""
+        if "-pytorch-" not in self.image:
+            self.skipTest("Not a Torch image")
+        import torchaudio  # pyright: ignore[reportMissingImports]
+
+        sample_rate = 16000
+        waveform = torchaudio.functional.generate_sine(440, sample_rate=sample_rate, duration=0.5)
+        self.assertEqual(waveform.shape, (1, 8000), "Torchaudio waveform shape is incorrect.")
+        print("✅ Torchaudio test passed.")
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)