Skip to content

Commit c47bdff

Browse files
committed
Merge branch 'master' into windows
2 parents e4c7367 + 244343c commit c47bdff

21 files changed

+493
-105
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ repos:
1212
rev: stable
1313
hooks:
1414
- id: black
15-
language_version: python3.6
15+
language_version: python3.7
1616
args: ["--config", ".black.toml"]
1717
- repo: local
1818
hooks:

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
# 0.6.7
2+
## Additions
3+
- Chamfer distance introduced in https://arxiv.org/pdf/1612.00603 for dense batches
4+
5+
# 0.6.6
6+
## Additions
7+
- Windows support
8+
9+
10+
## Change
11+
- Develop with python 3.7
12+
13+
## Bug fix
14+
- Fixed bug in region growing related to batching
15+
- Ball query for partial dense data on GPU was returning only the first point. Fixed now
16+
17+
118
# 0.6.5
219

320
## Additions

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ python -m unittest
2626
```
2727

2828
## Troubleshooting
29+
30+
### Compilation issues
2931
Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, e.g.:
3032
```
3133
$ python -c "import torch; print(torch.__version__)"
@@ -78,3 +80,5 @@ See [this useful chart](http://arnon.dk/matching-sm-architectures-arch-and-genco
7880
* [```Pointnet2_Tensorflow```](https://github.com/charlesq34/pointnet2) by [Charles R. Qi](https://github.com/charlesq34)
7981

8082
* [```Pointnet2_PyTorch```](https://github.com/erikwijmans/Pointnet2_PyTorch) by [Erik Wijmans](https://github.com/erikwijmans)
83+
84+
* [```GRNet```](https://github.com/hzxie/GRNet) by [Haozhe Xie](https://github.com/hzxie)

cuda/include/chamfer_dist.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include <torch/extension.h>
2+
#include <vector>
3+
4+
std::vector<torch::Tensor> chamfer_dist(torch::Tensor xyz1, torch::Tensor xyz2);
5+
6+
std::vector<torch::Tensor> chamfer_dist_grad(torch::Tensor xyz1, torch::Tensor xyz2,
7+
torch::Tensor idx1, torch::Tensor idx2,
8+
torch::Tensor grad_dist1, torch::Tensor grad_dist2);
9+
10+
std::vector<torch::Tensor> chamfer_dist_kernel_wrapper(torch::Tensor xyz1, torch::Tensor xyz2);
11+
12+
std::vector<torch::Tensor> chamfer_dist_grad_kernel_wrapper(torch::Tensor xyz1, torch::Tensor xyz2,
13+
torch::Tensor idx1, torch::Tensor idx2,
14+
torch::Tensor grad_dist1,
15+
torch::Tensor grad_dist2);

cuda/src/ball_query.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, in
66
const float* new_xyz, const float* xyz, int64_t* idx,
77
float* dist_out);
88

9-
void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y, float radius,
10-
int nsample, const float* x, const float* y,
11-
const int64_t* batch_x, const int64_t* batch_y,
12-
int64_t* idx_out, float* dist_out);
9+
void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y,
10+
float radius, int nsample, const float* x,
11+
const float* y, const int64_t* batch_x,
12+
const int64_t* batch_y, int64_t* idx_out,
13+
float* dist_out);
1314

1415
std::pair<at::Tensor, at::Tensor> ball_query_dense(at::Tensor new_xyz, at::Tensor xyz,
1516
const float radius, const int nsample)
@@ -71,10 +72,10 @@ std::pair<at::Tensor, at::Tensor> ball_query_partial_dense(at::Tensor x, at::Ten
7172
batch_y = degree(batch_y, batch_size);
7273
batch_y = at::cat({at::zeros(1, batch_y.options()), batch_y.cumsum(0)}, 0);
7374

74-
query_ball_point_kernel_partial_wrapper(batch_size, x.size(0), y.size(0), radius, nsample,
75-
x.DATA_PTR<float>(), y.DATA_PTR<float>(),
76-
batch_x.DATA_PTR<int64_t>(), batch_y.DATA_PTR<int64_t>(),
77-
idx.DATA_PTR<int64_t>(), dist.DATA_PTR<float>());
75+
query_ball_point_kernel_partial_wrapper(
76+
batch_size, x.size(0), y.size(0), radius, nsample, x.DATA_PTR<float>(), y.DATA_PTR<float>(),
77+
batch_x.DATA_PTR<int64_t>(), batch_y.DATA_PTR<int64_t>(), idx.DATA_PTR<int64_t>(),
78+
dist.DATA_PTR<float>());
7879

7980
return std::make_pair(idx, dist);
8081
}

cuda/src/ball_query_gpu.cu

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
__global__ void query_ball_point_kernel_dense(int b, int n, int m, float radius, int nsample,
1010
const float* __restrict__ new_xyz,
1111
const float* __restrict__ xyz,
12-
int64_t* __restrict__ idx_out,
12+
int64_t* __restrict__ idx_out,
1313
float* __restrict__ dist_out)
1414
{
1515
int batch_index = blockIdx.x;
@@ -51,15 +51,17 @@ __global__ void query_ball_point_kernel_dense(int b, int n, int m, float radius,
5151
}
5252
}
5353

54-
__global__ void query_ball_point_kernel_partial_dense(
55-
int size_x, int size_y, float radius, int nsample, const float* __restrict__ x,
56-
const float* __restrict__ y, const int64_t* __restrict__ batch_x, const int64_t* __restrict__ batch_y,
57-
int64_t* __restrict__ idx_out, float* __restrict__ dist_out)
54+
__global__ void query_ball_point_kernel_partial_dense(int size_x, int size_y, float radius,
55+
int nsample, const float* __restrict__ x,
56+
const float* __restrict__ y,
57+
const int64_t* __restrict__ batch_x,
58+
const int64_t* __restrict__ batch_y,
59+
int64_t* __restrict__ idx_out,
60+
float* __restrict__ dist_out)
5861
{
5962
// taken from
6063
// https://github.com/rusty1s/pytorch_cluster/blob/master/cuda/radius_kernel.cu
6164
const ptrdiff_t batch_idx = blockIdx.x;
62-
const ptrdiff_t idx = threadIdx.x;
6365

6466
const ptrdiff_t start_idx_x = batch_x[batch_idx];
6567
const ptrdiff_t end_idx_x = batch_x[batch_idx + 1];
@@ -68,10 +70,10 @@ __global__ void query_ball_point_kernel_partial_dense(
6870
const ptrdiff_t end_idx_y = batch_y[batch_idx + 1];
6971
float radius2 = radius * radius;
7072

71-
for (ptrdiff_t n_x = start_idx_x + idx; n_x < end_idx_x; n_x += TOTAL_THREADS_SPARSE)
73+
for (ptrdiff_t n_y = start_idx_y + threadIdx.x; n_y < end_idx_y; n_y += blockDim.x)
7274
{
7375
int64_t count = 0;
74-
for (ptrdiff_t n_y = start_idx_y; n_y < end_idx_y; n_y++)
76+
for (ptrdiff_t n_x = start_idx_x; n_x < end_idx_x; n_x++)
7577
{
7678
float dist = 0;
7779
for (ptrdiff_t d = 0; d < 3; d++)
@@ -93,19 +95,21 @@ __global__ void query_ball_point_kernel_partial_dense(
9395
}
9496

9597
void query_ball_point_kernel_dense_wrapper(int b, int n, int m, float radius, int nsample,
96-
const float* new_xyz, const float* xyz, int64_t* idx,float* dist_out)
98+
const float* new_xyz, const float* xyz, int64_t* idx,
99+
float* dist_out)
97100
{
98101
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
99102
query_ball_point_kernel_dense<<<b, opt_n_threads(m), 0, stream>>>(b, n, m, radius, nsample,
100-
new_xyz, xyz, idx,dist_out);
103+
new_xyz, xyz, idx, dist_out);
101104

102105
CUDA_CHECK_ERRORS();
103106
}
104107

105-
void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y, float radius,
106-
int nsample, const float* x, const float* y,
107-
const int64_t* batch_x, const int64_t* batch_y,
108-
int64_t* idx_out, float* dist_out)
108+
void query_ball_point_kernel_partial_wrapper(int64_t batch_size, int size_x, int size_y,
109+
float radius, int nsample, const float* x,
110+
const float* y, const int64_t* batch_x,
111+
const int64_t* batch_y, int64_t* idx_out,
112+
float* dist_out)
109113
{
110114
query_ball_point_kernel_partial_dense<<<batch_size, TOTAL_THREADS_SPARSE>>>(
111115
size_x, size_y, radius, nsample, x, y, batch_x, batch_y, idx_out, dist_out);

cuda/src/bindings.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "ball_query.h"
2+
#include "chamfer_dist.h"
23
#include "interpolate.h"
34
#include "metrics.h"
45
#include "sampling.h"
@@ -15,4 +16,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
1516
m.def("ball_query_partial_dense", &ball_query_partial_dense);
1617

1718
m.def("instance_iou_cuda", &instance_iou_cuda);
19+
20+
m.def("chamfer_dist", &chamfer_dist);
21+
m.def("chamfer_dist_grad", &chamfer_dist_grad);
1822
}

0 commit comments

Comments
 (0)