Merge pull request #48 from hzxie/master

nicolas-chaulet · web-flow · commit 87a756472b1a · 2020-07-09T08:59:15.000+01:00
Format all code with clang-format and black.
diff --git a/cuda/src/ball_query.cpp b/cuda/src/ball_query.cpp
@@ -6,10 +6,11 @@ void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, in
                                            const float* new_xyz, const float* xyz, int64_t* idx,
                                            float* dist_out);
 
-void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y, float radius,
-                                             int nsample, const float* x, const float* y,
-                                             const int64_t* batch_x, const int64_t* batch_y,
-                                             int64_t* idx_out, float* dist_out);
+void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y,
+                                             float radius, int nsample, const float* x,
+                                             const float* y, const int64_t* batch_x,
+                                             const int64_t* batch_y, int64_t* idx_out,
+                                             float* dist_out);
 
 std::pair<at::Tensor, at::Tensor> ball_query_dense(at::Tensor new_xyz, at::Tensor xyz,
                                                    const float radius, const int nsample)
@@ -71,10 +72,10 @@ std::pair<at::Tensor, at::Tensor> ball_query_partial_dense(at::Tensor x, at::Ten
     batch_y = degree(batch_y, batch_size);
     batch_y = at::cat({at::zeros(1, batch_y.options()), batch_y.cumsum(0)}, 0);
 
-    query_ball_point_kernel_partial_wrapper(batch_size, x.size(0), y.size(0), radius, nsample,
-                                            x.DATA_PTR<float>(), y.DATA_PTR<float>(),
-                                            batch_x.DATA_PTR<int64_t>(), batch_y.DATA_PTR<int64_t>(),
-                                            idx.DATA_PTR<int64_t>(), dist.DATA_PTR<float>());
+    query_ball_point_kernel_partial_wrapper(
+        batch_size, x.size(0), y.size(0), radius, nsample, x.DATA_PTR<float>(), y.DATA_PTR<float>(),
+        batch_x.DATA_PTR<int64_t>(), batch_y.DATA_PTR<int64_t>(), idx.DATA_PTR<int64_t>(),
+        dist.DATA_PTR<float>());
 
     return std::make_pair(idx, dist);
 }
diff --git a/cuda/src/ball_query_gpu.cu b/cuda/src/ball_query_gpu.cu
@@ -9,7 +9,7 @@
 __global__ void query_ball_point_kernel_dense(int b, int n, int m, float radius, int nsample,
                                               const float* __restrict__ new_xyz,
                                               const float* __restrict__ xyz,
-                                              int64_t* __restrict__ idx_out, 
+                                              int64_t* __restrict__ idx_out,
                                               float* __restrict__ dist_out)
 {
     int batch_index = blockIdx.x;
@@ -51,10 +51,13 @@ __global__ void query_ball_point_kernel_dense(int b, int n, int m, float radius,
     }
 }
 
-__global__ void query_ball_point_kernel_partial_dense(
-    int size_x, int size_y, float radius, int nsample, const float* __restrict__ x,
-    const float* __restrict__ y, const int64_t* __restrict__ batch_x, const int64_t* __restrict__ batch_y,
-    int64_t* __restrict__ idx_out, float* __restrict__ dist_out)
+__global__ void query_ball_point_kernel_partial_dense(int size_x, int size_y, float radius,
+                                                      int nsample, const float* __restrict__ x,
+                                                      const float* __restrict__ y,
+                                                      const int64_t* __restrict__ batch_x,
+                                                      const int64_t* __restrict__ batch_y,
+                                                      int64_t* __restrict__ idx_out,
+                                                      float* __restrict__ dist_out)
 {
     // taken from
     // https://github.com/rusty1s/pytorch_cluster/blob/master/cuda/radius_kernel.cu
@@ -67,7 +70,7 @@ __global__ void query_ball_point_kernel_partial_dense(
     const ptrdiff_t end_idx_y = batch_y[batch_idx + 1];
     float radius2 = radius * radius;
 
-    for (ptrdiff_t n_y = start_idx_y +  threadIdx.x; n_y < end_idx_y; n_y += blockDim.x)
+    for (ptrdiff_t n_y = start_idx_y + threadIdx.x; n_y < end_idx_y; n_y += blockDim.x)
     {
         int64_t count = 0;
         for (ptrdiff_t n_x = start_idx_x; n_x < end_idx_x; n_x++)
@@ -92,19 +95,21 @@ __global__ void query_ball_point_kernel_partial_dense(
 }
 
 void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, int nsample,
-                                           const float* new_xyz, const float* xyz, int64_t* idx,float* dist_out)
+                                           const float* new_xyz, const float* xyz, int64_t* idx,
+                                           float* dist_out)
 {
     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     query_ball_point_kernel_dense<<<b, opt_n_threads(m), 0, stream>>>(b, n, m, radius, nsample,
-                                                                      new_xyz, xyz, idx,dist_out);
+                                                                      new_xyz, xyz, idx, dist_out);
 
     CUDA_CHECK_ERRORS();
 }
 
-void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y, float radius,
-                                             int nsample, const float* x, const float* y,
-                                             const int64_t* batch_x, const int64_t* batch_y,
-                                             int64_t* idx_out, float* dist_out)
+void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y,
+                                             float radius, int nsample, const float* x,
+                                             const float* y, const int64_t* batch_x,
+                                             const int64_t* batch_y, int64_t* idx_out,
+                                             float* dist_out)
 {
     query_ball_point_kernel_partial_dense<<<batch_size, TOTAL_THREADS_SPARSE>>>(
         size_x, size_y, radius, nsample, x, y, batch_x, batch_y, idx_out, dist_out);
diff --git a/cuda/src/metrics.cpp b/cuda/src/metrics.cpp
@@ -3,8 +3,9 @@
 #include "utils.h"
 
 void instance_iou_kernel_wrapper(int64_t total_gt_instances, int64_t max_gt_instances,
-                                 const int64_t* nInstance, int nProposal, const int64_t* proposals_idx,
-                                 const int64_t* proposals_offset, const int64_t* instance_labels,
+                                 const int64_t* nInstance, int nProposal,
+                                 const int64_t* proposals_idx, const int64_t* proposals_offset,
+                                 const int64_t* instance_labels,
                                  const int64_t* offset_num_gt_instances, const int64_t* batch,
                                  const int64_t* instance_pointnum, float* proposals_iou);
 
@@ -41,9 +42,10 @@ at::Tensor instance_iou_cuda(at::Tensor instance_idx, at::Tensor instance_offset
         at::cat({at::zeros(1, num_gt_instances.options()), num_gt_instances.cumsum(0)}, 0);
     instance_iou_kernel_wrapper(
         total_gt_instances[0], max_gt_instances[0], num_gt_instances.DATA_PTR<int64_t>(),
-        num_proposed_instances, instance_idx.DATA_PTR<int64_t>(), instance_offsets.DATA_PTR<int64_t>(),
-        gt_instances.DATA_PTR<int64_t>(), offset_num_gt_instances.DATA_PTR<int64_t>(),
-        batch.DATA_PTR<int64_t>(), gt_instance_sizes.DATA_PTR<int64_t>(), output.DATA_PTR<float>());
+        num_proposed_instances, instance_idx.DATA_PTR<int64_t>(),
+        instance_offsets.DATA_PTR<int64_t>(), gt_instances.DATA_PTR<int64_t>(),
+        offset_num_gt_instances.DATA_PTR<int64_t>(), batch.DATA_PTR<int64_t>(),
+        gt_instance_sizes.DATA_PTR<int64_t>(), output.DATA_PTR<float>());
 
     return output;
 }
diff --git a/cuda/src/metrics_gpu.cu b/cuda/src/metrics_gpu.cu
@@ -9,9 +9,9 @@
 __global__ void instance_iou_cuda_kernel(
     int64_t total_gt_instances, const int64_t* __restrict__ nInstance, int nProposal,
     const int64_t* __restrict__ proposals_idx, const int64_t* __restrict__ proposals_offset,
-    const int64_t* __restrict__ instance_labels, const int64_t* __restrict__ offset_num_gt_instances,
-    const int64_t* __restrict__ batch, const int64_t* __restrict__ instance_pointnum,
-    float* proposals_iou)
+    const int64_t* __restrict__ instance_labels,
+    const int64_t* __restrict__ offset_num_gt_instances, const int64_t* __restrict__ batch,
+    const int64_t* __restrict__ instance_pointnum, float* proposals_iou)
 {
     for (int proposal_id = blockIdx.x; proposal_id < nProposal; proposal_id += gridDim.x)
     {
@@ -48,8 +48,9 @@ __global__ void instance_iou_cuda_kernel(
 // input: instance_pointnum (total_nInst), int
 // output: proposals_iou (nProposal, total_nInst), float
 void instance_iou_kernel_wrapper(int64_t total_gt_instances, int64_t max_gt_instances,
-                                 const int64_t* nInstance, int nProposal, const int64_t* proposals_idx,
-                                 const int64_t* proposals_offset, const int64_t* instance_labels,
+                                 const int64_t* nInstance, int nProposal,
+                                 const int64_t* proposals_idx, const int64_t* proposals_offset,
+                                 const int64_t* instance_labels,
                                  const int64_t* offset_num_gt_instances, const int64_t* batch,
                                  const int64_t* instance_pointnum, float* proposals_iou)
 {
diff --git a/setup.py b/setup.py
@@ -28,9 +28,7 @@ def get_ext_modules():
         extra_compile_args += ["-DVERSION_GE_1_3"]
 
     ext_src_root = "cuda"
-    ext_sources = glob.glob("{}/src/*.cpp".format(ext_src_root)) + glob.glob(
-        "{}/src/*.cu".format(ext_src_root)
-    )
+    ext_sources = glob.glob("{}/src/*.cpp".format(ext_src_root)) + glob.glob("{}/src/*.cu".format(ext_src_root))
 
     ext_modules = []
     if CUDA_HOME:
@@ -39,10 +37,7 @@ def get_ext_modules():
                 name="torch_points_kernels.points_cuda",
                 sources=ext_sources,
                 include_dirs=["{}/include".format(ext_src_root)],
-                extra_compile_args={
-                    "cxx": extra_compile_args,
-                    "nvcc": extra_compile_args,
-                },
+                extra_compile_args={"cxx": extra_compile_args, "nvcc": extra_compile_args,},
             )
         )
 
@@ -86,8 +81,5 @@ def get_cmdclass():
     cmdclass=get_cmdclass(),
     long_description=long_description,
     long_description_content_type="text/markdown",
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-    ],
+    classifiers=["Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License",],
 )
diff --git a/test/speed_radius.py b/test/speed_radius.py
@@ -23,8 +23,8 @@ def test_speed(self):
         R = 1
         samples = 50
 
-        idx, dist = ball_query(R, samples, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=True)
-        idx1, dist = ball_query(R, samples, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=True)
+        idx, dist = ball_query(R, samples, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=True,)
+        idx1, dist = ball_query(R, samples, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b, sort=True,)
         print(time.time() - start)
         torch.testing.assert_allclose(idx1, idx)
 
@@ -40,5 +40,6 @@ def test_speed(self):
         #     if p >= 0 and p < len(batch_a):
         #         assert p in idx3_sk[i]
 
+
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
diff --git a/test/test_chamfer_dist.py b/test/test_chamfer_dist.py
@@ -26,22 +26,14 @@ def test_chamfer_dist_grad(self):
 
     @run_if_cuda
     def test_chamfer_dist(self):
-        xyz1 = torch.from_numpy(np.array([[
-            [0, 0, 0],
-            [1, 1, 1],
-            [2, 0, 1]
-        ]])).float()
+        xyz1 = torch.from_numpy(np.array([[[0, 0, 0], [1, 1, 1], [2, 0, 1]]])).float()
         xyz2 = torch.from_numpy(np.array([[[1, 0, 0], [1, 2, 1]]])).float()
         dist = chamfer_dist(xyz1.cuda(), xyz2.cuda())
         self.assertAlmostEqual(dist.item(), 2.333333, places=5)
 
     @run_if_cuda
     def test_chamfer_dist_ignore_zeros(self):
-        xyz1 = torch.from_numpy(np.array([[
-            [0, 0, 0],
-            [1, 1, 1],
-            [2, 0, 1]
-        ]])).float()
+        xyz1 = torch.from_numpy(np.array([[[0, 0, 0], [1, 1, 1], [2, 0, 1]]])).float()
         xyz2 = torch.from_numpy(np.array([[[1, 0, 0], [1, 2, 1]]])).float()
         dist = chamfer_dist(xyz1.cuda(), xyz2.cuda(), True)
         self.assertAlmostEqual(dist.item(), 3.0, places=5)
diff --git a/test/test_cluster.py b/test/test_cluster.py
@@ -12,16 +12,7 @@
 class TestGrow(unittest.TestCase):
     def setUp(self):
         self.pos = torch.tensor(
-            [
-                [0, 0, 0],
-                [1, 0, 0],
-                [2, 0, 0],
-                [10, 0, 0],
-                [0, 0, 0],
-                [1, 0, 0],
-                [2, 0, 0],
-                [10, 0, 0],
-            ]
+            [[0, 0, 0], [1, 0, 0], [2, 0, 0], [10, 0, 0], [0, 0, 0], [1, 0, 0], [2, 0, 0], [10, 0, 0],]
         )
         self.batch = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1])
         self.labels = torch.tensor([0, 0, 1, 1, 0, 1, 1, 10])
@@ -34,9 +25,7 @@ def test_simple(self):
         self.assertEqual(clusters, [[0, 1, 2], [4, 5, 6]])
 
     def test_region_grow(self):
-        cluster_idx = region_grow(
-            self.pos, self.labels, self.batch, radius=2, min_cluster_size=1
-        )
+        cluster_idx = region_grow(self.pos, self.labels, self.batch, radius=2, min_cluster_size=1)
         self.assertEqual(len(cluster_idx), 6)
         torch.testing.assert_allclose(cluster_idx[0], torch.tensor([0, 1]))
         torch.testing.assert_allclose(cluster_idx[1], torch.tensor([4]))
diff --git a/test/test_grouping.py b/test/test_grouping.py
@@ -37,7 +37,9 @@ def test_simple(self):
         npt.assert_array_equal(expected, cpu_output)
 
         if torch.cuda.is_available():
-            npt.assert_array_equal(grouping_operation(features.cuda(), idx.cuda()).detach().cpu().numpy(), expected)
+            npt.assert_array_equal(
+                grouping_operation(features.cuda(), idx.cuda()).detach().cpu().numpy(), expected,
+            )
 
 
 if __name__ == "__main__":
diff --git a/test/test_metrics.py b/test/test_metrics.py
@@ -24,7 +24,9 @@ def test_simple(self, cuda=False):
             proposed_instances = [c.cuda() for c in proposed_instances]
             gt_instances = gt_instances.cuda()
         ious = instance_iou(proposed_instances, gt_instances)
-        torch.testing.assert_allclose(ious.cpu(), torch.tensor([[1, 0, 0], [0, 2 / 3.0, 0], [0, 1.0 / 4.0, 1.0 / 2.0]]))
+        torch.testing.assert_allclose(
+            ious.cpu(), torch.tensor([[1, 0, 0], [0, 2 / 3.0, 0], [0, 1.0 / 4.0, 1.0 / 2.0]]),
+        )
 
     def test_batch(self, cuda=False):
         gt_instances = torch.tensor([1, 2, 1, 2, 2, 3, 0])
diff --git a/torch_points_kernels/__init__.py b/torch_points_kernels/__init__.py
@@ -12,5 +12,5 @@
     "knn",
     "region_grow",
     "instance_iou",
-    "chamfer_dist"
+    "chamfer_dist",
 ]
diff --git a/torch_points_kernels/cluster.py b/torch_points_kernels/cluster.py
@@ -41,11 +41,7 @@ def grow_proximity(pos, batch, nsample=16, radius=0.02, min_cluster_size=32):
     """ Grow based on proximity only
     Neighbour search is done on device while the cluster assignement is done on cpu"""
     assert pos.shape[0] == batch.shape[0]
-    neighbours = (
-        ball_query_partial_dense(radius, nsample, pos, pos, batch, batch)[0]
-        .cpu()
-        .numpy()
-    )
+    neighbours = ball_query_partial_dense(radius, nsample, pos, pos, batch, batch)[0].cpu().numpy()
     return _grow_proximity_core(neighbours, min_cluster_size)
 
 
@@ -97,11 +93,7 @@ def region_grow(
 
         # Cluster
         label_clusters = grow_proximity(
-            pos[label_mask, :],
-            remaped_batch,
-            nsample=nsample,
-            radius=radius,
-            min_cluster_size=min_cluster_size,
+            pos[label_mask, :], remaped_batch, nsample=nsample, radius=radius, min_cluster_size=min_cluster_size,
         )
 
         # Remap indices to original coordinates
diff --git a/torch_points_kernels/torchpoints.py b/torch_points_kernels/torchpoints.py
@@ -30,10 +30,7 @@ def furthest_point_sample(xyz, npoint):
         (B, npoint) tensor containing the set
     """
     if npoint > xyz.shape[1]:
-        raise ValueError(
-            "caanot sample %i points from an input set of %i points"
-            % (npoint, xyz.shape[1])
-        )
+        raise ValueError("caanot sample %i points from an input set of %i points" % (npoint, xyz.shape[1]))
     if xyz.is_cuda:
         return tpcuda.furthest_point_sampling(xyz, npoint)
     else:
@@ -102,13 +99,9 @@ def backward(ctx, grad_out):
         idx, weight, m = ctx.three_interpolate_for_backward
 
         if grad_out.is_cuda:
-            grad_features = tpcuda.three_interpolate_grad(
-                grad_out.contiguous(), idx, weight, m
-            )
+            grad_features = tpcuda.three_interpolate_grad(grad_out.contiguous(), idx, weight, m)
         else:
-            grad_features = tpcpu.knn_interpolate_grad(
-                grad_out.contiguous(), idx, weight, m
-            )
+            grad_features = tpcpu.knn_interpolate_grad(grad_out.contiguous(), idx, weight, m)
 
         return grad_features, None, None
 
@@ -150,23 +143,17 @@ def grouping_operation(features, idx):
     all_idx = idx.reshape(idx.shape[0], -1)
     all_idx = all_idx.unsqueeze(1).repeat(1, features.shape[1], 1)
     grouped_features = features.gather(2, all_idx)
-    return grouped_features.reshape(
-        idx.shape[0], features.shape[1], idx.shape[1], idx.shape[2]
-    )
+    return grouped_features.reshape(idx.shape[0], features.shape[1], idx.shape[1], idx.shape[2])
 
 
-def ball_query_dense(
-    radius, nsample, xyz, new_xyz, batch_xyz=None, batch_new_xyz=None, sort=False
-):
+def ball_query_dense(radius, nsample, xyz, new_xyz, batch_xyz=None, batch_new_xyz=None, sort=False):
     # type: (Any, float, int, torch.Tensor, torch.Tensor) -> torch.Tensor
     if new_xyz.is_cuda:
         if sort:
             raise NotImplementedError("CUDA version does not sort the neighbors")
         ind, dist = tpcuda.ball_query_dense(new_xyz, xyz, radius, nsample)
     else:
-        ind, dist = tpcpu.dense_ball_query(
-            new_xyz, xyz, radius, nsample, mode=0, sorted=sort
-        )
+        ind, dist = tpcpu.dense_ball_query(new_xyz, xyz, radius, nsample, mode=0, sorted=sort)
     return ind, dist
 
 
@@ -175,13 +162,9 @@ def ball_query_partial_dense(radius, nsample, x, y, batch_x, batch_y, sort=False
     if x.is_cuda:
         if sort:
             raise NotImplementedError("CUDA version does not sort the neighbors")
-        ind, dist = tpcuda.ball_query_partial_dense(
-            x, y, batch_x, batch_y, radius, nsample
-        )
+        ind, dist = tpcuda.ball_query_partial_dense(x, y, batch_x, batch_y, radius, nsample)
     else:
-        ind, dist = tpcpu.batch_ball_query(
-            x, y, batch_x, batch_y, radius, nsample, mode=0, sorted=sort
-        )
+        ind, dist = tpcpu.batch_ball_query(x, y, batch_x, batch_y, radius, nsample, mode=0, sorted=sort)
     return ind, dist
 
 
@@ -224,9 +207,7 @@ def ball_query(
         assert x.size(0) == batch_x.size(0)
         assert y.size(0) == batch_y.size(0)
         assert x.dim() == 2
-        return ball_query_partial_dense(
-            radius, nsample, x, y, batch_x, batch_y, sort=sort
-        )
+        return ball_query_partial_dense(radius, nsample, x, y, batch_x, batch_y, sort=sort)
 
     elif mode.lower() == "dense":
         if (batch_x is not None) or (batch_y is not None):