Fix case where remote server has GPU but local machine does not (#806)

tgerdesnv · web-flow · commit 19f69e4b2b3d · 2024-01-11T08:20:16.000-06:00
* Fix case where remote server has GPU but local machine does not

* remove unused import
diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -1391,7 +1391,9 @@ def _autofill_values(self):
         config values.
         """
         cpu_only = False
-        if len(self.gpus) == 0 or not numba.cuda.is_available():
+        if self.triton_launch_mode != "remote" and (
+            len(self.gpus) == 0 or not numba.cuda.is_available()
+        ):
             cpu_only = True
 
         # Set global constraints if latency budget is specified
diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py
@@ -791,7 +791,7 @@ def _print_run_config_info(self, run_config):
         cpu_only = run_config.cpu_only()
 
         # Inform user CPU metric(s) are not being collected under CPU mode
-        collect_cpu_metrics_expect = cpu_only or len(self._gpus) == 0
+        collect_cpu_metrics_expect = cpu_only
         collect_cpu_metrics_actual = len(self._cpu_metrics) > 0
         if collect_cpu_metrics_expect and not collect_cpu_metrics_actual:
             if not self._cpu_warning_printed:
diff --git a/tests/mocks/mock_numba.py b/tests/mocks/mock_numba.py
@@ -25,7 +25,7 @@ class MockNumba(MockBase):
     Mocks numba class
     """
 
-    def __init__(self, mock_paths):
+    def __init__(self, mock_paths, is_available=True):
         device = MagicMock()
 
         # Ignore everything after 0
@@ -43,7 +43,10 @@ def __init__(self, mock_paths):
         list_devices = MagicMock()
         list_devices.return_value = [device]
 
-        cuda_attrs = {"list_devices": list_devices}
+        cuda_attrs = {
+            "list_devices": list_devices,
+            "is_available": MagicMock(return_value=is_available),
+        }
         numba_attrs = {"cuda": MagicMock(**cuda_attrs)}
         self._mock_paths = mock_paths
         self._patchers_numba = {}
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -67,9 +67,12 @@ def _create_parameters(
             "max_token_count": max_token_count,
         }
 
-    def _evaluate_config(self, args, yaml_content, subcommand="profile"):
+    def _evaluate_config(
+        self, args, yaml_content, subcommand="profile", numba_available=True
+    ):
         mock_numba = MockNumba(
-            mock_paths=["model_analyzer.config.input.config_command_profile"]
+            mock_paths=["model_analyzer.config.input.config_command_profile"],
+            is_available=numba_available,
         )
 
         mock_config = MockConfig(args, yaml_content)
@@ -109,6 +112,7 @@ def _assert_equality_of_model_configs(self, model_configs, expected_model_config
         for model_config, expected_model_config in zip(
             model_configs, expected_model_configs
         ):
+            self.assertEqual(expected_model_config.cpu_only(), model_config.cpu_only())
             self.assertEqual(
                 expected_model_config.model_name(), model_config.model_name()
             )
@@ -1384,6 +1388,32 @@ def test_autofill(self):
         ]
         self._assert_equality_of_model_configs(model_configs, expected_model_configs)
 
+        # Test autofill CPU_ONLY. It will only be false if no local gpus are available AND we are not in remote mode
+        yaml_content = """
+profile_models:
+  - vgg_16_graphdef
+"""
+        for launch_mode in ["remote", "c_api", "docker", "local"]:
+            for local_gpus_available in [True, False]:
+                new_args = args.copy()
+                new_args.extend(["--triton-launch-mode", launch_mode])
+                config = self._evaluate_config(
+                    new_args, yaml_content, numba_available=local_gpus_available
+                )
+                model_configs = config.get_all_config()["profile_models"]
+                expected_cpu_only = not local_gpus_available and launch_mode != "remote"
+                expected_model_configs = [
+                    ConfigModelProfileSpec(
+                        "vgg_16_graphdef",
+                        cpu_only=expected_cpu_only,
+                        parameters=self._create_parameters(batch_sizes=[1]),
+                        objectives={"perf_throughput": 10},
+                    )
+                ]
+                self._assert_equality_of_model_configs(
+                    model_configs, expected_model_configs
+                )
+
     def test_config_shorthands(self):
         """
         test flags like --latency-budget