[Feature] Add the support of three_interpolate op for Ascend device (#2962)

lihao7212148 · web-flow · commit 94dff2655467 · 2023-10-17T10:40:09.000+08:00
diff --git a/mmcv/ops/csrc/pytorch/npu/three_interpolate_npu.cpp b/mmcv/ops/csrc/pytorch/npu/three_interpolate_npu.cpp
@@ -0,0 +1,29 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void three_interpolate_forward_npu(int b, int c, int m, int n,
+                                   const Tensor points, const Tensor idx,
+                                   const Tensor weight, Tensor out) {
+  auto point_c_trans = points.transpose(1, 2);
+
+  OpCommand cmd;
+  cmd.Name("ThreeInterpolate")
+      .Input(point_c_trans)
+      .Input(idx)
+      .Input(weight)
+      .Output(out)
+      .Run();
+
+  auto output = out.view({b, n, c}).transpose(1, 2);
+  auto res = NpuUtils::format_contiguous(output);
+  out.copy_(res);
+}
+
+void three_interpolate_forward_impl(int b, int c, int m, int n,
+                                    const Tensor points, const Tensor idx,
+                                    const Tensor weight, Tensor out);
+
+REGISTER_NPU_IMPL(three_interpolate_forward_impl,
+                  three_interpolate_forward_npu);
diff --git a/tests/test_ops/test_three_interpolate.py b/tests/test_ops/test_three_interpolate.py
@@ -3,12 +3,28 @@
 import torch
 
 from mmcv.ops import three_interpolate
+from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
 
 
-@pytest.mark.skipif(
-    not torch.cuda.is_available(), reason='requires CUDA support')
-@pytest.mark.parametrize('dtype', [torch.half, torch.float, torch.double])
-def test_three_interpolate(dtype):
+@pytest.mark.parametrize('dtype', [
+    torch.half, torch.float,
+    pytest.param(
+        torch.double,
+        marks=pytest.mark.skipif(
+            IS_NPU_AVAILABLE,
+            reason='NPU does not support for 64-bit floating point'))
+])
+@pytest.mark.parametrize('device', [
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+    pytest.param(
+        'npu',
+        marks=pytest.mark.skipif(
+            not IS_NPU_AVAILABLE, reason='requires NPU support'))
+])
+def test_three_interpolate(dtype, device):
     features = torch.tensor(
         [[[2.4350, 4.7516, 4.4995, 2.4350, 2.4350, 2.4350],
           [3.1236, 2.6278, 3.0447, 3.1236, 3.1236, 3.1236],
@@ -20,12 +36,13 @@ def test_three_interpolate(dtype):
           [0.0000, 0.2744, 2.0842, 0.0000, 0.0000, 0.0000],
           [0.3414, 1.5063, 1.6209, 0.3414, 0.3414, 0.3414],
           [0.5814, 0.0103, 0.0000, 0.5814, 0.5814, 0.5814]]],
-        dtype=dtype).cuda()
+        dtype=dtype,
+        device=device)
 
-    idx = torch.tensor([[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2],
-                         [0, 1, 3]],
-                        [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4],
-                         [0, 1, 2]]]).int().cuda()
+    idx = torch.tensor(
+        [[[0, 1, 2], [2, 3, 4], [2, 3, 4], [0, 1, 2], [0, 1, 2], [0, 1, 3]],
+         [[0, 2, 3], [1, 3, 4], [2, 1, 4], [0, 2, 4], [0, 2, 4], [0, 1, 2]]],
+        device=device).int()
 
     weight = torch.tensor([[[3.3333e-01, 3.3333e-01, 3.3333e-01],
                             [1.0000e+00, 5.8155e-08, 2.2373e-08],
@@ -39,7 +56,8 @@ def test_three_interpolate(dtype):
                             [3.3333e-01, 3.3333e-01, 3.3333e-01],
                             [3.3333e-01, 3.3333e-01, 3.3333e-01],
                             [3.3333e-01, 3.3333e-01, 3.3333e-01]]],
-                          dtype=dtype).cuda()
+                          dtype=dtype,
+                          device=device)
 
     output = three_interpolate(features, idx, weight)
     expected_output = torch.tensor([[[
@@ -73,6 +91,7 @@ def test_three_interpolate(dtype):
                                          3.8760e-01, 1.0300e-02, 8.3569e-09,
                                          3.8760e-01, 3.8760e-01, 1.9723e-01
                                      ]]],
-                                   dtype=dtype).cuda()
+                                   dtype=dtype,
+                                   device=device)
 
     assert torch.allclose(output, expected_output, 1e-3, 1e-4)