tests(multi_gpu): Refactor the multi gpu test and remove redundant code

narendasan · narendasan · commit c1ab5db3e71f · 2021-02-24T16:22:38.000-08:00
Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
Signed-off-by: Naren Dasan &lt;narens@nvidia.com&gt;
diff --git a/tests/py/BUILD b/tests/py/BUILD
@@ -15,9 +15,9 @@ py_test(
         "test_api.py",
         "model_test_case.py"
     ] + select({
-	":aarch64_linux": [
-		"test_api_dla.py"
-	],
+    ":aarch64_linux": [
+        "test_api_dla.py"
+    ],
     "//conditions:default" : []
     }),
     deps = [
@@ -27,14 +27,11 @@ py_test(
 
 # Following multi_gpu test is only targeted for multi-gpu configurations. It is not included in the test suite by default.
 py_test(
-    name = "test_api_multi_gpu",
+    name = "test_multi_gpu",
     srcs = [
-        "test_api_multi_gpu.py",
+        "test_multi_gpu.py",
         "model_test_case.py"
-    ] + select({
-	":aarch64_linux": [
-		"test_api_dla.py"
-	],
+    ],
     "//conditions:default" : []
     }),
     deps = [
diff --git a/tests/py/multi_gpu_test_case.py b/tests/py/multi_gpu_test_case.py
diff --git a/tests/py/test_multi_gpu.py b/tests/py/test_multi_gpu.py
@@ -3,61 +3,64 @@
 import torch
 import torchvision.models as models
 
-from multi_gpu_test_case import MultiGpuTestCase
-
-gpu_id = 1
-class TestCompile(MultiGpuTestCase):
+from model_test_case import ModelTestCase
 
+class TestMultiGpuSwitching(ModelTestCase):
     def setUp(self):
-        self.input = torch.randn((1, 3, 224, 224)).to("cuda")
+        if torch.cuda.device_count() < 2:
+            self.fail("Test is not relevant for this platform since number of available CUDA devices is less than 2")
+
+        trtorch.set_device(0)
+        self.target_gpu = 1
+        self.input = torch.randn((1, 3, 224, 224)).to("cuda:1")
+        self.model = self.model.to("cuda:1")
         self.traced_model = torch.jit.trace(self.model, [self.input])
         self.scripted_model = torch.jit.script(self.model)
 
     def test_compile_traced(self):
+        trtorch.set_device(0)
         compile_spec = {
             "input_shapes": [self.input.shape],
             "device": {
                 "device_type": trtorch.DeviceType.GPU,
-                "gpu_id": gpu_id,
+                "gpu_id": self.target_gpu,
                 "dla_core": 0,
                 "allow_gpu_fallback": False,
                 "disable_tf32": False
             }
         }
 
         trt_mod = trtorch.compile(self.traced_model, compile_spec)
+        trtorch.set_device(self.target_gpu)
         same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max()
+        trtorch.set_device(0)
         self.assertTrue(same < 2e-3)
 
     def test_compile_script(self):
+        trtorch.set_device(0)
         compile_spec = {
             "input_shapes": [self.input.shape],
             "device": {
                 "device_type": trtorch.DeviceType.GPU,
-                "gpu_id": gpu_id,
+                "gpu_id": self.target_gpu,
                 "dla_core": 0,
                 "allow_gpu_fallback": False,
                 "disable_tf32": False
             }
         }
 
         trt_mod = trtorch.compile(self.scripted_model, compile_spec)
+        trtorch.set_device(self.target_gpu)
         same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max()
+        trtorch.set_device(0)
         self.assertTrue(same < 2e-3)
 
-
-
 def test_suite():
     suite = unittest.TestSuite()
-    suite.addTest(TestCompile.parametrize(TestCompile, model=models.resnet18(pretrained=True)))
+    suite.addTest(TestMultiGpuSwitching.parametrize(TestMultiGpuSwitching, model=models.resnet18(pretrained=True)))
 
     return suite
 
-if not torch.cuda.device_count() > 1:
-    raise ValueError("This test case is applicable for multi-gpu configurations only")
-
-# Setting it up here so that all CUDA allocations are done on correct device
-trtorch.set_device(gpu_id)
 suite = test_suite()
 
 runner = unittest.TextTestRunner()