Merge pull request #14 from foundation-model-stack/test/run-cpu-instead

hickeyma · web-flow · commit 404d479ccb35 · 2024-12-09T16:09:29.000Z
test: Add checks for unit tests that require Nvidia GPU
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -35,8 +35,6 @@ permissions:
 
 jobs:
   test:
-    # Disable until runner with Nvidia GPU available
-    if: false
     name: "test: ${{ matrix.python }} on ${{ matrix.platform }}"
     runs-on: "${{ matrix.platform }}"
     strategy:
diff --git a/.gitignore b/.gitignore
@@ -37,3 +37,6 @@ venv/
 # generated by setuptools_scm
 /fms_mo/_version.py
 
+#Generated by tests
+qcfg.json
+
diff --git a/tests/models/conftest.py b/tests/models/conftest.py
@@ -505,52 +505,53 @@ def model_config_fp16():
     return deepcopy(ToyModel4().half())
 
 
-class ToyModelQuantized(torch.nn.Module):
-    """
-    Three layer Linear model that has a quantized layer
-
-    Extends:
-        torch.nn.Module
-    """
+# QLinear class requires Nvidia GPU and cuda
+if torch.cuda.is_available():
 
-    def __init__(self):
-        super().__init__()
-        kwargs = {"qcfg": qconfig_init()}  # QLinear requires qconfig to work
-        self.first_layer = torch.nn.Linear(3, 3, bias=True)
-        self.second_layer = QLinear(3, 3, bias=True, **kwargs)
-        self.third_layer = torch.nn.Linear(3, 3, bias=True)
+    class ToyModelQuantized(torch.nn.Module):
+        """
+        Three layer Linear model that has a quantized layer
 
-    def forward(self, input_tensor):
+        Extends:
+            torch.nn.Module
         """
-        Forward func for Toy Model
 
-        Args:
-            input_tensor (torch.FloatTensor): Tensor to operate on
+        def __init__(self):
+            super().__init__()
+            kwargs = {"qcfg": qconfig_init()}  # QLinear requires qconfig to work
+            self.first_layer = torch.nn.Linear(3, 3, bias=True)
+            self.second_layer = QLinear(3, 3, bias=True, **kwargs)
+            self.third_layer = torch.nn.Linear(3, 3, bias=True)
 
-        Returns:
-            torch.FloatTensor:
-        """
-        out = self.first_layer(input_tensor)
-        out = self.second_layer(out)
-        out = self.third_layer(out)
-        return out
+        def forward(self, input_tensor):
+            """
+            Forward func for Toy Model
 
+            Args:
+                input_tensor (torch.FloatTensor): Tensor to operate on
 
-model_quantized_params = [ToyModelQuantized()]
+            Returns:
+                torch.FloatTensor:
+            """
+            out = self.first_layer(input_tensor)
+            out = self.second_layer(out)
+            out = self.third_layer(out)
+            return out
 
+    model_quantized_params = [ToyModelQuantized()]
 
-@pytest.fixture(scope="function", params=model_quantized_params)
-def model_quantized(request):
-    """
-    Toy Model that has quantized layer
+    @pytest.fixture(scope="function", params=model_quantized_params)
+    def model_quantized(request):
+        """
+        Toy Model that has quantized layer
 
-    Args:
-        request (torch.nn.Module): Toy Model
+        Args:
+            request (torch.nn.Module): Toy Model
 
-    Returns:
-        torch.nn.Module: Toy Model
-    """
-    return deepcopy(request.param)
+        Returns:
+            torch.nn.Module: Toy Model
+        """
+        return deepcopy(request.param)
 
 
 # Get a model to test layer uniqueness
diff --git a/tests/models/test_qmodelprep.py b/tests/models/test_qmodelprep.py
@@ -28,26 +28,29 @@
 from fms_mo.prep import has_quantized_module
 from tests.models.test_model_utils import delete_config, qmodule_error
 
-
 ################
 # Qmodel tests #
 ################
-def test_model_quantized(
-    model_quantized: torch.nn.Module,
-    sample_input_fp32: torch.FloatTensor,
-    config_fp32: dict,
-):
-    """
-    qmodel_prep should always throw RuntimeError if a model is already quantized
 
-    Args:
-        model_quantized (torch.nn.Module): Quantized Toy Model
-        sample_input_fp32 (torch.FloatTensor): Sample fp32 input for calibration.
-        config_fp32 (dict): Config w/ fp32 settings
-    """
-    delete_config()
-    with pytest.raises(RuntimeError):
-        qmodel_prep(model_quantized, sample_input_fp32, config_fp32)
+# Requires Nvidia GPU to run
+if torch.cuda.is_available():
+
+    def test_model_quantized(
+        model_quantized: torch.nn.Module,
+        sample_input_fp32: torch.FloatTensor,
+        config_fp32: dict,
+    ):
+        """
+        qmodel_prep should always throw RuntimeError if a model is already quantized
+
+        Args:
+            model_quantized (torch.nn.Module): Quantized Toy Model
+            sample_input_fp32 (torch.FloatTensor): Sample fp32 input for calibration.
+            config_fp32 (dict): Config w/ fp32 settings
+        """
+        delete_config()
+        with pytest.raises(RuntimeError):
+            qmodel_prep(model_quantized, sample_input_fp32, config_fp32)
 
 
 def test_double_qmodel_prep_assert(