refactor: Refactor nox file testing

peri044 · peri044 · commit beeac7cd3976 · 2022-08-31T17:18:13.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -435,6 +435,7 @@ commands:
             mkdir -p /tmp/artifacts/test_results
             cd tests/py
             pytest --junitxml=/tmp/artifacts/test_results/api/api_test_results.xml api/
+            pytest --junitxml=/tmp/artifacts/test_results/models/models_test_results.xml models/
             pytest --junitxml=/tmp/artifacts/test_results/integrations/integrations_test_results.xml integrations/
             cd ~/project
 
diff --git a/noxfile.py b/noxfile.py
@@ -30,13 +30,15 @@
 if USE_HOST_DEPS:
     print("Using dependencies from host python")
 
+# Set epochs to train VGG model for accuracy tests
+EPOCHS=25
+
 SUPPORTED_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
 
 nox.options.sessions = [
     "l0_api_tests-" + "{}.{}".format(sys.version_info.major, sys.version_info.minor)
 ]
 
-
 def install_deps(session):
     print("Installing deps")
     session.install("-r", os.path.join(TOP_DIR, "py", "requirements.txt"))
@@ -63,31 +65,6 @@ def install_torch_trt(session):
         session.run("python", "setup.py", "develop")
 
 
-def download_datasets(session):
-    print(
-        "Downloading dataset to path",
-        os.path.join(TOP_DIR, "examples/int8/training/vgg16"),
-    )
-    session.chdir(os.path.join(TOP_DIR, "examples/int8/training/vgg16"))
-    session.run_always(
-        "wget", "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", external=True
-    )
-    session.run_always("tar", "-xvzf", "cifar-10-binary.tar.gz", external=True)
-    session.run_always(
-        "mkdir",
-        "-p",
-        os.path.join(TOP_DIR, "tests/accuracy/datasets/data"),
-        external=True,
-    )
-    session.run_always(
-        "cp",
-        "-rpf",
-        os.path.join(TOP_DIR, "examples/int8/training/vgg16/cifar-10-batches-bin"),
-        os.path.join(TOP_DIR, "tests/accuracy/datasets/data/cidar-10-batches-bin"),
-        external=True,
-    )
-
-
 def train_model(session):
     session.chdir(os.path.join(TOP_DIR, "examples/int8/training/vgg16"))
     session.install("-r", "requirements.txt")
@@ -107,14 +84,14 @@ def train_model(session):
             "--ckpt-dir",
             "vgg16_ckpts",
             "--epochs",
-            "25",
+            str(EPOCHS),
             env={"PYTHONPATH": PYT_PATH},
         )
 
         session.run_always(
             "python",
             "export_ckpt.py",
-            "vgg16_ckpts/ckpt_epoch25.pth",
+            "vgg16_ckpts/ckpt_epoch" + str(EPOCHS) + ".pth",
             env={"PYTHONPATH": PYT_PATH},
         )
     else:
@@ -130,10 +107,10 @@ def train_model(session):
             "--ckpt-dir",
             "vgg16_ckpts",
             "--epochs",
-            "25",
+            str(EPOCHS),
         )
 
-        session.run_always("python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch25.pth")
+        session.run_always("python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS) + ".pth")
 
 
 def finetune_model(session):
@@ -156,17 +133,17 @@ def finetune_model(session):
             "--ckpt-dir",
             "vgg16_ckpts",
             "--start-from",
-            "25",
+            str(EPOCHS),
             "--epochs",
-            "26",
+            str(EPOCHS+1),
             env={"PYTHONPATH": PYT_PATH},
         )
 
         # Export model
         session.run_always(
             "python",
             "export_qat.py",
-            "vgg16_ckpts/ckpt_epoch26.pth",
+            "vgg16_ckpts/ckpt_epoch" + str(EPOCHS+1) + ".pth",
             env={"PYTHONPATH": PYT_PATH},
         )
     else:
@@ -182,13 +159,13 @@ def finetune_model(session):
             "--ckpt-dir",
             "vgg16_ckpts",
             "--start-from",
-            "25",
+            str(EPOCHS),
             "--epochs",
-            "26",
+            str(EPOCHS+1),
         )
 
         # Export model
-        session.run_always("python", "export_qat.py", "vgg16_ckpts/ckpt_epoch26.pth")
+        session.run_always("python", "export_qat.py", "vgg16_ckpts/ckpt_epoch" + str(EPOCHS+1) + ".pth")
 
 
 def cleanup(session):
@@ -209,7 +186,7 @@ def run_base_tests(session):
     print("Running basic tests")
     session.chdir(os.path.join(TOP_DIR, "tests/py"))
     tests = [
-        "api",
+        "api/test_e2e_behavior.py",
         "integrations/test_to_backend_api.py",
     ]
     for test in tests:
@@ -218,6 +195,18 @@ def run_base_tests(session):
         else:
             session.run_always("pytest", test)
 
+def run_model_tests(session):
+    print("Running model tests")
+    session.chdir(os.path.join(TOP_DIR, "tests/py"))
+    tests = [
+        "models",
+    ]
+    for test in tests:
+        if USE_HOST_DEPS:
+            session.run_always("pytest", test, env={"PYTHONPATH": PYT_PATH})
+        else:
+            session.run_always("pytest", test)
+
 
 def run_accuracy_tests(session):
     print("Running accuracy tests")
@@ -268,8 +257,8 @@ def run_trt_compatibility_tests(session):
     copy_model(session)
     session.chdir(os.path.join(TOP_DIR, "tests/py"))
     tests = [
-        "test_trt_intercompatibility.py",
-        "test_ptq_trt_calibrator.py",
+        "integrations/test_trt_intercompatibility.py",
+        #"ptq/test_ptq_trt_calibrator.py",
     ]
     for test in tests:
         if USE_HOST_DEPS:
@@ -282,7 +271,7 @@ def run_dla_tests(session):
     print("Running DLA tests")
     session.chdir(os.path.join(TOP_DIR, "tests/py"))
     tests = [
-        "test_api_dla.py",
+        "hw/test_api_dla.py",
     ]
     for test in tests:
         if USE_HOST_DEPS:
@@ -295,7 +284,7 @@ def run_multi_gpu_tests(session):
     print("Running multi GPU tests")
     session.chdir(os.path.join(TOP_DIR, "tests/py"))
     tests = [
-        "test_multi_gpu.py",
+        "hw/test_multi_gpu.py",
     ]
     for test in tests:
         if USE_HOST_DEPS:
@@ -321,22 +310,18 @@ def run_l0_dla_tests(session):
     run_base_tests(session)
     cleanup(session)
 
-
-def run_l1_accuracy_tests(session):
+def run_l1_model_tests(session):
     if not USE_HOST_DEPS:
         install_deps(session)
         install_torch_trt(session)
-    download_datasets(session)
-    train_model(session)
-    run_accuracy_tests(session)
+    download_models(session)
+    run_model_tests(session)
     cleanup(session)
 
-
 def run_l1_int8_accuracy_tests(session):
     if not USE_HOST_DEPS:
         install_deps(session)
         install_torch_trt(session)
-    download_datasets(session)
     train_model(session)
     finetune_model(session)
     run_int8_accuracy_tests(session)
@@ -348,7 +333,6 @@ def run_l2_trt_compatibility_tests(session):
         install_deps(session)
         install_torch_trt(session)
     download_models(session)
-    download_datasets(session)
     train_model(session)
     run_trt_compatibility_tests(session)
     cleanup(session)
@@ -368,18 +352,15 @@ def l0_api_tests(session):
     """When a developer needs to check correctness for a PR or something"""
     run_l0_api_tests(session)
 
-
 @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True)
 def l0_dla_tests(session):
     """When a developer needs to check basic api functionality using host dependencies"""
     run_l0_dla_tests(session)
 
-
 @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True)
-def l1_accuracy_tests(session):
-    """Checking accuracy performance on various usecases"""
-    run_l1_accuracy_tests(session)
-
+def l1_model_tests(session):
+    """When a developer needs to check correctness for a PR or something"""
+    run_l1_model_tests(session)
 
 @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True)
 def l1_int8_accuracy_tests(session):
@@ -397,13 +378,3 @@ def l2_trt_compatibility_tests(session):
 def l2_multi_gpu_tests(session):
     """Makes sure that Torch-TensorRT can operate on multi-gpu systems"""
     run_l2_multi_gpu_tests(session)
-
-
-@nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True)
-def download_test_models(session):
-    """Grab all the models needed for testing"""
-    try:
-        import torch
-    except ModuleNotFoundError:
-        install_deps(session)
-    download_models(session)
diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py
@@ -225,8 +225,8 @@ def _parse_input_signature(input_signature: Any):
 
 
 def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
-    # TODO: Remove deep copy once collections does not need partial compilation
-    compile_spec = deepcopy(compile_spec_)
+    # TODO: Use deepcopy to support partial compilation of collections
+    compile_spec = compile_spec_
     info = _ts_C.CompileSpec()
 
     if len(compile_spec["inputs"]) > 0:
@@ -301,7 +301,7 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
             compile_spec["enabled_precisions"]
         )
 
-    if "calibrator" in compile_spec:
+    if "calibrator" in compile_spec and compile_spec["calibrator"]:
         info.ptq_calibrator = compile_spec["calibrator"]
 
     if "sparse_weights" in compile_spec:
diff --git a/tests/py/api/test_embed_engines.py b/tests/py/api/test_embed_engines.py
@@ -4,7 +4,6 @@
 import torchvision.models as models
 import copy
 import timm
-import custom_models as cm
 from typing import Dict
 from utils import cosine_similarity, COSINE_THRESHOLD
 
diff --git a/tests/py/hw/test_api_dla.py b/tests/py/hw/test_api_dla.py
@@ -2,6 +2,7 @@
 import torch_tensorrt as torchtrt
 import torch
 import torchvision.models as models
+from utils import cosine_similarity, COSINE_THRESHOLD
 
 
 class ModelTestCaseOnDLA(unittest.TestCase):
@@ -39,8 +40,8 @@ def test_compile_traced(self):
         }
 
         trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec)
-        same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max()
-        self.assertTrue(same < 2e-2)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"ModelTestCaseOnDLA traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
 
     def test_compile_script(self):
         compile_spec = {
@@ -55,8 +56,8 @@ def test_compile_script(self):
         }
 
         trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec)
-        same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max()
-        self.assertTrue(same < 2e-2)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"ModelTestCaseOnDLA scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
 
 
 def test_suite():
diff --git a/tests/py/hw/test_multi_gpu.py b/tests/py/hw/test_multi_gpu.py
@@ -35,9 +35,9 @@ def test_compile_traced(self):
 
         trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec)
         torchtrt.set_device(self.target_gpu)
-        same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max()
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
         torchtrt.set_device(0)
-        self.assertTrue(same < 2e-3)
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
 
     def test_compile_script(self):
         torchtrt.set_device(0)
@@ -54,9 +54,10 @@ def test_compile_script(self):
 
         trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec)
         torchtrt.set_device(self.target_gpu)
-        same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max()
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
         torchtrt.set_device(0)
-        self.assertTrue(same < 2e-3)
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
+
 
 
 class TestMultiGpuSerializeDeserializeSwitching(ModelTestCase):
@@ -89,8 +90,8 @@ def test_compile_traced(self):
         trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec)
         # Changing the device ID deliberately. It should still run on correct device ID by context switching
         torchtrt.set_device(1)
-        same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max()
-        self.assertTrue(same < 2e-3)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSerializeDeserializeSwitching traced TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
 
     def test_compile_script(self):
         torchtrt.set_device(0)
@@ -108,8 +109,8 @@ def test_compile_script(self):
         trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec)
         # Changing the device ID deliberately. It should still run on correct device ID by context switching
         torchtrt.set_device(1)
-        same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max()
-        self.assertTrue(same < 2e-3)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestMultiGpuSerializeDeserializeSwitching scripted TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
 
 
 def test_suite():
diff --git a/tests/py/integrations/test_to_backend_api.py b/tests/py/integrations/test_to_backend_api.py
@@ -2,7 +2,7 @@
 import torch_tensorrt as torchtrt
 import torch
 import torchvision.models as models
-
+from utils import cosine_similarity, COSINE_THRESHOLD
 
 class TestToBackendLowering(unittest.TestCase):
     def setUp(self):
@@ -31,10 +31,9 @@ def setUp(self):
 
     def test_to_backend_lowering(self):
         trt_mod = torch._C._jit_to_backend("tensorrt", self.scripted_model, self.spec)
-        same = (
-            (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max()
-        )
-        self.assertTrue(same < 2e-3)
+        cos_sim = cosine_similarity(self.model(self.input), trt_mod(self.input))
+        self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestToBackendLowering TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
+
 
 
 if __name__ == "__main__":
diff --git a/tests/py/integrations/test_trt_intercompatibility.py b/tests/py/integrations/test_trt_intercompatibility.py
@@ -3,7 +3,7 @@
 import torch
 import torchvision.models as models
 import tensorrt as trt
-
+from utils import cosine_similarity, COSINE_THRESHOLD
 
 class TestPyTorchToTRTEngine(unittest.TestCase):
     def test_pt_to_trt(self):
@@ -42,8 +42,8 @@ def test_pt_to_trt(self):
                         device="cuda:0"
                     ).cuda_stream,
                 )
-                same = (out - self.ts_model(self.input)).abs().max()
-                self.assertTrue(same < 2e-3)
+                cos_sim = cosine_similarity(self.model(self.input), out)
+                self.assertTrue(cos_sim > COSINE_THRESHOLD, msg=f"TestPyTorchToTRTEngine TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}")
 
 
 if __name__ == "__main__":
diff --git a/tests/py/models/custom_models.py b/tests/py/models/custom_models.py
diff --git a/tests/py/models/test_models.py b/tests/py/models/test_models.py
diff --git a/tests/py/models/test_multiple_registered_engines.py b/tests/py/models/test_multiple_registered_engines.py
diff --git a/tests/py/models/utils.py b/tests/py/models/utils.py