Skip to content

Commit 3c9a8e4

Browse files
Merge branch 'master' into singleapi
2 parents 80fd7ab + 2884bcc commit 3c9a8e4

File tree

8 files changed

+164
-56
lines changed

8 files changed

+164
-56
lines changed

cpu/include/ball_query.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
99
at::Tensor query_batch,
1010
at::Tensor support_batch,
1111
float radius, int max_num, int mode);
12+
13+
std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
14+
at::Tensor support,
15+
float radius, int max_num, int mode);

cpu/include/cloud.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct PointCloud
5757
pts = temp;
5858
}
5959
void set_batch(std::vector<scalar_t> new_pts, int begin, int size){
60+
6061
std::vector<PointXYZ> temp(size);
6162
for(int i=0; i < size; i++){
6263
PointXYZ point;

cpu/src/bindings.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,15 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
3535
"mode=1 means a matrix of edges of size Num_edge x 2"
3636
"return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
3737
"query"_a, "support"_a, "query_batch"_a, "support_batch"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
38+
m.def("dense_ball_query", &dense_ball_query,
39+
"compute the radius search of a batch of point cloud using nanoflann"
40+
"-query : a pytorch tensor of size B x N1 x 3,. used to query the nearest neighbors"
41+
"- support : a pytorch tensor of size B x N2 x 3. used to build the tree"
42+
"- radius : float number, size of the ball for the radius search."
43+
"- max_num : int number, indicate the maximum of neaghbors allowed(if -1 then all the possible neighbors will be computed). "
44+
" - mode : int number that indicate which format for the neighborhood"
45+
" mode=0 mean a matrix of neighbors(-1 for shadow neighbors)"
46+
"mode=1 means a matrix of edges of size Num_edge x 2"
47+
"return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
48+
"query"_a, "support"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
3849
}

cpu/src/neighbors.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,20 @@ int nanoflann_neighbors(vector<scalar_t>& queries,
8282

8383
i0 = 0;
8484

85+
int token = 0;
8586
for (auto& inds : list_matches){
87+
token = inds[0].first;
8688
for (int j = 0; j < max_count; j++){
8789
if (j < inds.size()){
8890
neighbors_indices[i0 * max_count + j] = inds[j].first;
8991
dists[i0 * max_count + j] = (float) inds[j].second;
92+
93+
9094
}
9195

9296
else {
93-
neighbors_indices[i0 * max_count + j] = -1;
94-
dists[i0 * max_count + j] = radius * radius;
97+
neighbors_indices[i0 * max_count + j] = token;
98+
dists[i0 * max_count + j] = -1;
9599
}
96100
}
97101
i0++;
@@ -186,24 +190,30 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
186190
search_params.sorted = true;
187191
for (auto& p0 : query_pcd.pts){
188192
// Check if we changed batch
189-
190-
if (i0 == sum_qb + q_batches[b]){
193+
if (i0 == sum_qb + q_batches[b] && b < s_batches.size()){
191194
sum_qb += q_batches[b];
192195
sum_sb += s_batches[b];
196+
193197
b++;
194198

195199
// Change the points
196200
current_cloud.pts.clear();
197201
current_cloud.set_batch(supports, sum_sb, s_batches[b]);
198202
// Build KDTree of the current element of the batch
199203
delete index;
204+
200205
index = new my_kd_tree_t(3, current_cloud, tree_params);
201206
index->buildIndex();
202207
}
203208
// Initial guess of neighbors size
209+
210+
204211
all_inds_dists[i0].reserve(max_count);
205212
// Find neighbors
213+
//std::cerr << p0.x << p0.y << p0.z<<std::endl;
206214
scalar_t query_pt[3] = { p0.x, p0.y, p0.z};
215+
216+
207217
size_t nMatches = index->radiusSearch(query_pt, r2, all_inds_dists[i0], search_params);
208218
// Update max count
209219

@@ -217,8 +227,10 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
217227
max_count = max_num;
218228
}
219229
// Reserve the memory
230+
220231
if(mode == 0){
221232
neighbors_indices.resize(query_pcd.pts.size() * max_count);
233+
222234
dists.resize(query_pcd.pts.size() * max_count);
223235
i0 = 0;
224236
sum_sb = 0;
@@ -227,6 +239,7 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
227239

228240
for (auto& inds_dists : all_inds_dists){// Check if we changed batch
229241

242+
230243
if (i0 == sum_qb + q_batches[b]){
231244
sum_qb += q_batches[b];
232245
sum_sb += s_batches[b];
@@ -239,8 +252,8 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
239252
dists[i0 * max_count + j] = (float) inds_dists[j].second;
240253
}
241254
else {
242-
neighbors_indices[i0 * max_count + j] = supports.size();
243-
dists[i0 * max_count + j] = radius * radius;
255+
neighbors_indices[i0 * max_count + j] = supports.size()/3;
256+
dists[i0 * max_count + j] = -1;
244257
}
245258

246259
}

cpu/src/torch_nearest_neighbors.cpp

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,22 @@ std::pair<at::Tensor, at::Tensor> ball_query(at::Tensor query,
6161

6262
void cumsum(const vector<long>& batch, vector<long>& res){
6363

64-
res.resize(batch[batch.size()-1]-batch[0]+1, 0);
64+
res.resize(batch[batch.size()-1]-batch[0]+2, 0);
6565
long ind = batch[0];
6666
long incr = 1;
67-
for(int i=1; i < batch.size(); i++){
68-
69-
if(batch[i] == ind)
70-
incr++;
71-
else{
72-
res[ind-batch[0]] = incr;
73-
incr =1;
74-
ind = batch[i];
67+
if(res.size() > 1){
68+
for(int i=1; i < batch.size(); i++){
69+
if(batch[i] == ind)
70+
incr++;
71+
else{
72+
res[ind-batch[0]+1] = incr;
73+
incr =1;
74+
ind = batch[i];
75+
}
7576
}
77+
7678
}
77-
res[ind-batch[0]] = incr;
79+
res[ind-batch[0]+1] = incr;
7880
}
7981

8082
std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
@@ -89,9 +91,11 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
8991
std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
9092
std::vector<long> cumsum_query_batch_stl;
9193
cumsum(query_batch_stl, cumsum_query_batch_stl);
94+
9295
std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
9396
std::vector<long> cumsum_support_batch_stl;
9497
cumsum(support_batch_stl, cumsum_support_batch_stl);
98+
9599
std::vector<long> neighbors_indices;
96100

97101
auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
@@ -107,6 +111,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
107111
std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
108112
data_s + support.size(0)*support.size(1));
109113

114+
110115
max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
111116
supports_stl,
112117
cumsum_query_batch_stl,
@@ -117,6 +122,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
117122
max_num,
118123
mode);
119124
});
125+
120126
long* neighbors_indices_ptr = neighbors_indices.data();
121127
auto neighbors_dists_ptr = neighbors_dists.data();
122128

@@ -135,3 +141,23 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
135141
}
136142
return std::make_pair(out.clone(), out_dists.clone());
137143
}
144+
145+
146+
std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
147+
at::Tensor support,
148+
float radius, int max_num, int mode){
149+
150+
int b = query.size(0);
151+
vector<at::Tensor> batch_idx;
152+
vector<at::Tensor> batch_dist;
153+
for (int i=0; i < b; i++){
154+
155+
auto out_pair = ball_query(query[i], support[i], radius, max_num, mode);
156+
batch_idx.push_back(out_pair.first);
157+
batch_dist.push_back(out_pair.second);
158+
}
159+
auto out_idx = torch::stack(batch_idx);
160+
auto out_dist = torch::stack(batch_dist);
161+
return std::make_pair(out_idx, out_dist);
162+
163+
}

setup.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
from setuptools import setup, find_packages
2-
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, CppExtension
2+
from torch.utils.cpp_extension import (
3+
BuildExtension,
4+
CUDAExtension,
5+
CUDA_HOME,
6+
CppExtension,
7+
)
38
import glob
49

510
ext_src_root = "cuda"
@@ -33,12 +38,14 @@
3338
)
3439
)
3540

41+
requirements = ["torch^1.1.0"]
42+
3643
setup(
3744
name="torch_points",
38-
version="0.1.4",
45+
version="0.1.5",
3946
author="Nicolas Chaulet",
4047
packages=find_packages(),
41-
install_requires=[],
48+
install_requires=requirements,
4249
ext_modules=ext_modules,
4350
cmdclass={"build_ext": BuildExtension},
4451
)

test/test_ballquerry.py

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,88 @@ def test_simple_gpu(self):
1010
a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float).cuda()
1111
b = torch.tensor([[[0, 0, 0]]]).to(torch.float).cuda()
1212

13-
npt.assert_array_equal(ball_query(1, 2, a, b).detach().cpu().numpy(), np.array([[[0, 0]]]))
13+
npt.assert_array_equal(
14+
ball_query(1, 2, a, b).detach().cpu().numpy(), np.array([[[0, 0]]])
15+
)
1416

1517
def test_larger_gpu(self):
1618
a = torch.randn(32, 4096, 3).to(torch.float).cuda()
1719
idx = ball_query(1, 64, a, a).detach().cpu().numpy()
18-
self.assertGreaterEqual(idx.min(),0)
20+
self.assertGreaterEqual(idx.min(), 0)
1921

20-
# def test_simple_cpu(self):
21-
# a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float)
22-
# b = torch.tensor([[[0, 0, 0]]]).to(torch.float)
23-
# npt.assert_array_equal(ball_query(1, 2, a, b).detach().numpy(), np.array([[[0, 0]]]))
22+
def test_cpu_gpu_equality(self):
23+
a = torch.randn(5, 1000, 3)
24+
res_cpu = ball_query(0.1, 17, a, a).detach().numpy()
25+
res_cuda = ball_query(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy()
26+
for i in range(a.shape[0]):
27+
for j in range(a.shape[1]):
28+
# Because it is not necessary the same order
29+
assert set(res_cpu[i][j]) == set(res_cuda[i][j])
2430

25-
# def test_cpu_gpu_equality(self):
26-
# a = torch.randn(5, 1000, 3)
27-
# npt.assert_array_equal(ball_query(0.1, 17, a, a).detach().numpy(),
28-
# ball_query(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy())
2931

30-
def test_partial_gpu(self):
31-
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float).cuda()
32+
class TestBallPartial(unittest.TestCase):
33+
def test_simple_gpu(self):
34+
x = (
35+
torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]])
36+
.to(torch.float)
37+
.cuda()
38+
)
3239
y = torch.tensor([[0, 0, 0]]).to(torch.float).cuda()
3340
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
3441
batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
35-
42+
3643
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
3744
batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
3845

39-
idx, dist2 = ball_query(1., 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
46+
idx, dist2 = ball_query(
47+
1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
48+
)
4049

4150
idx = idx.detach().cpu().numpy()
4251
dist2 = dist2.detach().cpu().numpy()
4352

4453
idx_answer = np.asarray([[1, 4]])
45-
dist2_answer = np.asarray([[ 0.0100, -1.0000]]).astype(np.float32)
54+
dist2_answer = np.asarray([[0.0100, -1.0000]]).astype(np.float32)
4655

4756
npt.assert_array_almost_equal(idx, idx_answer)
4857
npt.assert_array_almost_equal(dist2, dist2_answer)
4958

59+
def test_simple_cpu(self):
60+
x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(
61+
torch.float
62+
)
63+
y = torch.tensor([[0, 0, 0]]).to(torch.float)
64+
65+
batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
66+
batch_y = torch.from_numpy(np.asarray([0])).long()
67+
68+
idx, dist2 = ball_query(
69+
1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
70+
)
71+
72+
idx = idx.detach().cpu().numpy()
73+
dist2 = dist2.detach().cpu().numpy()
74+
75+
idx_answer = np.asarray([[1, 1], [0, 1], [1, 1], [1, 1]])
76+
dist2_answer = np.asarray([[-1, -1], [0.01, -1], [-1, -1], [-1, -1]]).astype(
77+
np.float32
78+
)
79+
80+
npt.assert_array_almost_equal(idx, idx_answer)
81+
npt.assert_array_almost_equal(dist2, dist2_answer)
82+
83+
def test_random_cpu(self):
84+
a = torch.randn(1000, 3).to(torch.float)
85+
b = torch.randn(1500, 3).to(torch.float)
86+
batch_a = torch.randint(1, (1000,)).sort(0)[0].long()
87+
batch_b = torch.randint(1, (1500,)).sort(0)[0].long()
88+
idx, dist = ball_query(
89+
1.0, 12, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b
90+
)
91+
idx2, dist2 = ball_query(
92+
1.0, 12, b, a, mode="PARTIAL_DENSE", batch_x=batch_b, batch_y=batch_a
93+
)
94+
5095

5196
if __name__ == "__main__":
5297
unittest.main()

0 commit comments

Comments
 (0)