Add support of torchscript for rotated_nms

MrParosk · facebook-github-bot · commit 25e283a951d9 · 2021-02-01T11:28:05.000-08:00
Summary: Related to issue: #2525 I had to change the types from float to double in the files for it to compile. Pull Request resolved: #2550 Reviewed By: theschnitz Differential Revision: D26169817 Pulled By: ppwwyyxx fbshipit-source-id: 0a43467b4eb99f11d95f219b777add6ce01fdc19
diff --git a/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/detectron2/layers/csrc/nms_rotated/nms_rotated.h
@@ -7,13 +7,13 @@ namespace detectron2 {
 at::Tensor nms_rotated_cpu(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold);
+    const double iou_threshold);
 
 #if defined(WITH_CUDA) || defined(WITH_HIP)
 at::Tensor nms_rotated_cuda(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold);
+    const double iou_threshold);
 #endif
 
 // Interface for Python
@@ -22,7 +22,7 @@ at::Tensor nms_rotated_cuda(
 inline at::Tensor nms_rotated(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   assert(dets.device().is_cuda() == scores.device().is_cuda());
   if (dets.device().is_cuda()) {
 #if defined(WITH_CUDA) || defined(WITH_HIP)
diff --git a/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp
@@ -8,7 +8,7 @@ template <typename scalar_t>
 at::Tensor nms_rotated_cpu_kernel(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
   // however, the code in this function is much shorter because
   // we delegate the IoU computation for rotated boxes to
@@ -63,7 +63,7 @@ at::Tensor nms_rotated_cpu(
     // input must be contiguous
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   auto result = at::empty({0}, dets.options());
 
   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
diff --git a/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
@@ -20,7 +20,7 @@ int const threadsPerBlock = sizeof(unsigned long long) * 8;
 template <typename T>
 __global__ void nms_rotated_cuda_kernel(
     const int n_boxes,
-    const float iou_threshold,
+    const double iou_threshold,
     const T* dev_boxes,
     unsigned long long* dev_mask) {
   // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
@@ -81,7 +81,7 @@ at::Tensor nms_rotated_cuda(
     // input must be contiguous
     const at::Tensor& dets,
     const at::Tensor& scores,
-    float iou_threshold) {
+    double iou_threshold) {
   // using scalar_t = float;
   AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
   AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
diff --git a/detectron2/layers/csrc/vision.cpp b/detectron2/layers/csrc/vision.cpp
@@ -115,4 +115,10 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
       .def(pybind11::init<>());
 }
+
+#ifdef TORCH_LIBRARY
+TORCH_LIBRARY(detectron2, m) {
+  m.def("nms_rotated", &nms_rotated);
+}
+#endif
 } // namespace detectron2
diff --git a/detectron2/layers/nms.py b/detectron2/layers/nms.py
@@ -6,6 +6,15 @@
 from torchvision.ops import boxes as box_ops
 from torchvision.ops import nms  # BC-compat
 
+from detectron2.utils.env import TORCH_VERSION
+
+if TORCH_VERSION < (1, 7):
+    from detectron2 import _C
+
+    nms_rotated_func = _C.nms_rotated
+else:
+    nms_rotated_func = torch.ops.detectron2.nms_rotated
+
 
 def batched_nms(
     boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
@@ -93,9 +102,7 @@ def nms_rotated(boxes, scores, iou_threshold):
         keep (Tensor): int64 tensor with the indices of the elements that have been kept
         by Rotated NMS, sorted in decreasing order of scores
     """
-    from detectron2 import _C
-
-    return _C.nms_rotated(boxes, scores, iou_threshold)
+    return nms_rotated_func(boxes, scores, iou_threshold)
 
 
 # Note: this function (batched_nms_rotated) might be moved into
diff --git a/tests/layers/test_nms_rotated.py b/tests/layers/test_nms_rotated.py
@@ -2,10 +2,12 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 import numpy as np
 import unittest
+from copy import deepcopy
 import torch
 from torchvision import ops
 
 from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated
+from detectron2.utils.env import TORCH_VERSION
 from detectron2.utils.testing import random_boxes
 
 
@@ -149,5 +151,25 @@ def test_nms_rotated_180_degrees_cpu(self):
             self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
 
 
+class TestScriptable(unittest.TestCase):
+    def setUp(self):
+        class TestingModule(torch.nn.Module):
+            def forward(self, boxes, scores, threshold):
+                return nms_rotated(boxes, scores, threshold)
+
+        self.module = TestingModule()
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_scriptable_cpu(self):
+        m = deepcopy(self.module).cpu()
+        _ = torch.jit.script(m)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_scriptable_cuda(self):
+        m = deepcopy(self.module).cuda()
+        _ = torch.jit.script(m)
+
+
 if __name__ == "__main__":
     unittest.main()