Merge pull request #11 from humanpose1/query_ball_cpu

nicolas-chaulet · web-flow · commit 2884bcc1da1d · 2020-01-10T23:34:25.000Z
Query ball cpu
diff --git a/cpu/include/ball_query.h b/cpu/include/ball_query.h
@@ -9,3 +9,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 						   at::Tensor query_batch,
 						   at::Tensor support_batch,
 						   float radius, int max_num, int mode);
+
+std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
+						   at::Tensor support,
+						   float radius, int max_num, int mode);
diff --git a/cpu/include/cloud.h b/cpu/include/cloud.h
@@ -57,6 +57,7 @@ struct PointCloud
 		pts = temp;
 	}
 	void set_batch(std::vector<scalar_t> new_pts, int begin, int size){
+
 		std::vector<PointXYZ> temp(size);
 		for(int i=0; i < size; i++){
 			PointXYZ point;
diff --git a/cpu/src/bindings.cpp b/cpu/src/bindings.cpp
@@ -35,4 +35,15 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 		"mode=1 means a matrix of edges of size Num_edge x 2"
 	      "return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
 	      "query"_a, "support"_a, "query_batch"_a, "support_batch"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
+    m.def("dense_ball_query", &dense_ball_query,
+	  "compute the radius search of a batch of point cloud using nanoflann"
+	      "-query : a pytorch tensor of size B x N1 x 3,. used to query the nearest neighbors"
+	      "- support : a pytorch tensor of size B x N2 x 3. used to build the tree"
+	      "-  radius : float number, size of the ball for the radius search."
+	      "- max_num : int number, indicate the maximum of neaghbors allowed(if -1 then all the possible neighbors will be computed). "
+	      " - mode : int number that indicate which format for the neighborhood"
+	      " mode=0 mean a matrix of neighbors(-1 for shadow neighbors)"
+	      "mode=1 means a matrix of edges of size Num_edge x 2"
+	      "return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
+	      "query"_a, "support"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
 }
diff --git a/cpu/src/neighbors.cpp b/cpu/src/neighbors.cpp
@@ -82,16 +82,20 @@ int nanoflann_neighbors(vector<scalar_t>& queries,
 
 		i0 = 0;
 
+		int token = 0;
 		for (auto& inds : list_matches){
+			token = inds[0].first;
 			for (int j = 0; j < max_count; j++){
 				if (j < inds.size()){
 					neighbors_indices[i0 * max_count + j] = inds[j].first;
 					dists[i0 * max_count + j] = (float) inds[j].second;
+
+
 				}
 
 				else {
-					neighbors_indices[i0 * max_count + j] = -1;
-					dists[i0 * max_count + j] = radius * radius;
+					neighbors_indices[i0 * max_count + j] = token;
+					dists[i0 * max_count + j] = -1;
 				}
 			}
 			i0++;
@@ -186,24 +190,30 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 	search_params.sorted = true;
 	for (auto& p0 : query_pcd.pts){
 // Check if we changed batch
-
-		if (i0 == sum_qb + q_batches[b]){
+		if (i0 == sum_qb + q_batches[b] && b < s_batches.size()){
 			sum_qb += q_batches[b];
 			sum_sb += s_batches[b];
+
 			b++;
 
 // Change the points
 			current_cloud.pts.clear();
 			current_cloud.set_batch(supports, sum_sb, s_batches[b]);
 // Build KDTree of the current element of the batch
 			delete index;
+
 			index = new my_kd_tree_t(3, current_cloud, tree_params);
 			index->buildIndex();
 		}
 // Initial guess of neighbors size
+
+
 		all_inds_dists[i0].reserve(max_count);
 // Find neighbors
+		//std::cerr << p0.x << p0.y << p0.z<<std::endl;
 		scalar_t query_pt[3] = { p0.x, p0.y, p0.z};
+
+
 		size_t nMatches = index->radiusSearch(query_pt, r2, all_inds_dists[i0], search_params);
 // Update max count
 
@@ -217,8 +227,10 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 		max_count = max_num;
 	}
 // Reserve the memory
+
 	if(mode == 0){
 		neighbors_indices.resize(query_pcd.pts.size() * max_count);
+
 		dists.resize(query_pcd.pts.size() * max_count);
 		i0 = 0;
 		sum_sb = 0;
@@ -227,6 +239,7 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 
 		for (auto& inds_dists : all_inds_dists){// Check if we changed batch
 
+
 			if (i0 == sum_qb + q_batches[b]){
 				sum_qb += q_batches[b];
 				sum_sb += s_batches[b];
@@ -239,8 +252,8 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 					dists[i0 * max_count + j] = (float) inds_dists[j].second;
 				}
 				else {
-					neighbors_indices[i0 * max_count + j] = supports.size();
-					dists[i0 * max_count + j] = radius * radius;
+					neighbors_indices[i0 * max_count + j] = supports.size()/3;
+					dists[i0 * max_count + j] = -1;
 				}
 
 			}
diff --git a/cpu/src/torch_nearest_neighbors.cpp b/cpu/src/torch_nearest_neighbors.cpp
@@ -61,20 +61,22 @@ std::pair<at::Tensor, at::Tensor> ball_query(at::Tensor query,
 
 void cumsum(const vector<long>& batch, vector<long>& res){
 
-	res.resize(batch[batch.size()-1]-batch[0]+1, 0);
+	res.resize(batch[batch.size()-1]-batch[0]+2, 0);
 	long ind = batch[0];
 	long incr = 1;
-	for(int i=1; i < batch.size(); i++){
-
-		if(batch[i] == ind)
-			incr++;
-		else{
-			res[ind-batch[0]] = incr;
-			incr =1;
-			ind = batch[i];
+	if(res.size() > 1){
+		for(int i=1; i < batch.size(); i++){
+			if(batch[i] == ind)
+				incr++;
+			else{
+				res[ind-batch[0]+1] = incr;
+				incr =1;
+				ind = batch[i];
+			}
 		}
+
 	}
-	res[ind-batch[0]] = incr;
+	res[ind-batch[0]+1] = incr;
 }
 
 std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
@@ -89,9 +91,11 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 	std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
 	std::vector<long> cumsum_query_batch_stl;
 	cumsum(query_batch_stl, cumsum_query_batch_stl);
+
 	std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
 	std::vector<long> cumsum_support_batch_stl;
 	cumsum(support_batch_stl, cumsum_support_batch_stl);
+
 	std::vector<long> neighbors_indices;
 
 	auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
@@ -107,6 +111,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 	std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
 								   data_s + support.size(0)*support.size(1));
 
+
 	max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
 							supports_stl,
 							cumsum_query_batch_stl,
@@ -117,6 +122,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 							max_num,
 							mode);
 	});
+
 	long* neighbors_indices_ptr = neighbors_indices.data();
 	auto neighbors_dists_ptr = neighbors_dists.data();
 
@@ -135,3 +141,23 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 	}
 	return std::make_pair(out.clone(), out_dists.clone());
 }
+
+
+std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
+						   at::Tensor support,
+						   float radius, int max_num, int mode){
+
+	int b = query.size(0);
+	vector<at::Tensor> batch_idx;
+	vector<at::Tensor> batch_dist;
+	for (int i=0; i < b; i++){
+
+		auto out_pair = ball_query(query[i], support[i], radius, max_num, mode);
+		batch_idx.push_back(out_pair.first);
+		batch_dist.push_back(out_pair.second);
+	}
+	auto out_idx = torch::stack(batch_idx);
+	auto out_dist = torch::stack(batch_dist);
+	return std::make_pair(out_idx, out_dist);
+
+}
diff --git a/test/test_ballquerry.py b/test/test_ballquerry.py
@@ -19,8 +19,12 @@ def test_simple_cpu(self):
 
     def test_cpu_gpu_equality(self):
         a = torch.randn(5, 1000, 3)
-        npt.assert_array_equal(ball_query_dense(0.1, 17, a, a).detach().numpy(),
-                               ball_query_dense(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy())
+        res_cpu = ball_query_dense(0.1, 17, a, a).detach().numpy()
+        res_cuda = ball_query_dense(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy()
+        for i in range(a.shape[0]):
+            for j in range(a.shape[1]):
+                # Because it is not necessary the same order
+                assert set(res_cpu[i][j]) == set(res_cuda[i][j])
 
 
 if __name__ == "__main__":
diff --git a/test/test_ballquerry_partial.py b/test/test_ballquerry_partial.py
@@ -1,6 +1,7 @@
 import unittest
 import torch
 from torch_points import ball_query
+from torch_points.points_cpu import ball_query as cpu_ball_query
 from torch_cluster import radius_cuda
 import numpy.testing as npt
 import numpy as np
@@ -11,7 +12,7 @@ def test_simple_gpu(self):
         y = torch.tensor([[0, 0, 0]]).to(torch.float).cuda()
         batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
         batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
-        
+
         batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
         batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
 
@@ -26,5 +27,43 @@ def test_simple_gpu(self):
         npt.assert_array_almost_equal(idx, idx_answer)
         npt.assert_array_almost_equal(dist2, dist2_answer)
 
+
+    def test_simple_cpu(self):
+        x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float)
+        y = torch.tensor([[0, 0, 0]]).to(torch.float)
+        batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
+        batch_y = torch.from_numpy(np.asarray([0])).long()
+
+        batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
+        batch_y = torch.from_numpy(np.asarray([0])).long()
+
+        idx, dist2 = ball_query(1., 2, x, y, batch_x, batch_y, mode="PARTIAL_DENSE")
+
+        idx = idx.detach().cpu().numpy()
+        dist2 = dist2.detach().cpu().numpy()
+
+        idx_answer = np.asarray([[1, 1], [0, 1], [1, 1], [1, 1]])
+        dist2_answer = np.asarray([[-1, -1], [0.01, -1], [-1, -1], [-1, -1]]).astype(np.float32)
+
+        npt.assert_array_almost_equal(idx, idx_answer)
+        npt.assert_array_almost_equal(dist2, dist2_answer)
+
+    def test_random_cpu(self):
+        a = torch.randn(1000, 3).to(torch.float)
+        b = torch.randn(1500, 3).to(torch.float)
+        batch_a = torch.randint(1, (1000,)).sort(0)[0].long()
+        batch_b = torch.randint(1, (1500,)).sort(0)[0].long()
+        idx, dist2 = ball_query(1.0, 12, a, b, batch_a, batch_b, mode="PARTIAL_DENSE")
+        idx, dist2 = ball_query(1.0, 12, b, a, batch_b, batch_a, mode="PARTIAL_DENSE")
+        idx = idx.detach().cpu().numpy()
+        dist2 = dist2.detach().cpu().numpy()
+        idx2, _ = cpu_ball_query(a, b, 1.0, 12)
+        print(idx[5], print(idx2[5]))
+
+
+
+
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/torch_points/torchpoints.py b/torch_points/torchpoints.py
@@ -251,17 +251,10 @@ def forward(ctx, radius, nsample, xyz, new_xyz, batch_xyz=None, batch_new_xyz=No
         if new_xyz.is_cuda:
             return tpcuda.ball_query_dense(new_xyz, xyz, radius, nsample)
         else:
-            b = xyz.size(0)
-            npoints = new_xyz.size(1)
-            n = xyz.size(1)
-            batch_new_xyz = torch.arange(0, b, dtype=torch.long).repeat(npoints, 1).T.reshape(-1)
-            batch_xyz = torch.arange(0, b, dtype=torch.long).repeat(n, 1).T.reshape(-1)
-            ind, dist = tpcpu.batch_ball_query(new_xyz.view(-1, 3),
-                                               xyz.view(-1, 3),
-                                               batch_new_xyz,
-                                               batch_xyz,
-                                               radius, nsample)
-            return ind.view(b, npoints, nsample)
+            ind, dist = tpcpu.dense_ball_query(new_xyz,
+                                               xyz,
+                                               radius, nsample, mode=0)
+            return ind
 
     @staticmethod
     def backward(ctx, a=None):
@@ -299,7 +292,11 @@ def forward(ctx, radius, nsample, x, y, batch_x, batch_y):
                                                    batch_y,
                                                    radius, nsample)
         else:
-            raise NotImplementedError
+            ind, dist = tpcpu.batch_ball_query(x, y,
+                                               batch_x,
+                                               batch_y,
+                                               radius, nsample, mode=0)
+            return ind, dist
 
     @staticmethod
     def backward(ctx, a=None):
@@ -319,9 +316,9 @@ def ball_query_partial_dense(radius, nsample, x, y, batch_x, batch_y):
     y : torch.Tensor
         (N, npoint, 3) centers of the ball query
     batch_x : torch.Tensor
-        (M, ) Contains indexes to indicate within batch it belongs to. 
+        (M, ) Contains indexes to indicate within batch it belongs to.
     batch_y : torch.Tensor
-        (N, ) Contains indexes to indicate within batch it belongs to  
+        (N, ) Contains indexes to indicate within batch it belongs to
 
     Returns
     -------

Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,7 @@ struct PointCloud`
`57`	`57`	`pts = temp;`
`58`	`58`	`}`
`59`	`59`	`void set_batch(std::vector<scalar_t> new_pts, int begin, int size){`
	`60`	`+`
`60`	`61`	`std::vector<PointXYZ> temp(size);`
`61`	`62`	`for(int i=0; i < size; i++){`
`62`	`63`	`PointXYZ point;`