Merge branch 'master' into singleapi

nicolas-chaulet · nicolas-chaulet · commit 3c9a8e48b9fc · 2020-01-13T09:56:47.000Z
diff --git a/cpu/include/ball_query.h b/cpu/include/ball_query.h
@@ -9,3 +9,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 						   at::Tensor query_batch,
 						   at::Tensor support_batch,
 						   float radius, int max_num, int mode);
+
+std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
+						   at::Tensor support,
+						   float radius, int max_num, int mode);
diff --git a/cpu/include/cloud.h b/cpu/include/cloud.h
@@ -57,6 +57,7 @@ struct PointCloud
 		pts = temp;
 	}
 	void set_batch(std::vector<scalar_t> new_pts, int begin, int size){
+
 		std::vector<PointXYZ> temp(size);
 		for(int i=0; i < size; i++){
 			PointXYZ point;
diff --git a/cpu/src/bindings.cpp b/cpu/src/bindings.cpp
@@ -35,4 +35,15 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 		"mode=1 means a matrix of edges of size Num_edge x 2"
 	      "return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
 	      "query"_a, "support"_a, "query_batch"_a, "support_batch"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
+    m.def("dense_ball_query", &dense_ball_query,
+	  "compute the radius search of a batch of point cloud using nanoflann"
+	      "-query : a pytorch tensor of size B x N1 x 3,. used to query the nearest neighbors"
+	      "- support : a pytorch tensor of size B x N2 x 3. used to build the tree"
+	      "-  radius : float number, size of the ball for the radius search."
+	      "- max_num : int number, indicate the maximum of neaghbors allowed(if -1 then all the possible neighbors will be computed). "
+	      " - mode : int number that indicate which format for the neighborhood"
+	      " mode=0 mean a matrix of neighbors(-1 for shadow neighbors)"
+	      "mode=1 means a matrix of edges of size Num_edge x 2"
+	      "return a tensor of size N1 x M where M is either max_num or the maximum number of neighbors found if mode = 0, if mode=1 return a tensor of size Num_edge x 2 and return a tensor containing the squared distance of the neighbors",
+	      "query"_a, "support"_a, "radius"_a, "max_num"_a=-1, "mode"_a=0);
 }
diff --git a/cpu/src/neighbors.cpp b/cpu/src/neighbors.cpp
@@ -82,16 +82,20 @@ int nanoflann_neighbors(vector<scalar_t>& queries,
 
 		i0 = 0;
 
+		int token = 0;
 		for (auto& inds : list_matches){
+			token = inds[0].first;
 			for (int j = 0; j < max_count; j++){
 				if (j < inds.size()){
 					neighbors_indices[i0 * max_count + j] = inds[j].first;
 					dists[i0 * max_count + j] = (float) inds[j].second;
+
+
 				}
 
 				else {
-					neighbors_indices[i0 * max_count + j] = -1;
-					dists[i0 * max_count + j] = radius * radius;
+					neighbors_indices[i0 * max_count + j] = token;
+					dists[i0 * max_count + j] = -1;
 				}
 			}
 			i0++;
@@ -186,24 +190,30 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 	search_params.sorted = true;
 	for (auto& p0 : query_pcd.pts){
 // Check if we changed batch
-
-		if (i0 == sum_qb + q_batches[b]){
+		if (i0 == sum_qb + q_batches[b] && b < s_batches.size()){
 			sum_qb += q_batches[b];
 			sum_sb += s_batches[b];
+
 			b++;
 
 // Change the points
 			current_cloud.pts.clear();
 			current_cloud.set_batch(supports, sum_sb, s_batches[b]);
 // Build KDTree of the current element of the batch
 			delete index;
+
 			index = new my_kd_tree_t(3, current_cloud, tree_params);
 			index->buildIndex();
 		}
 // Initial guess of neighbors size
+
+
 		all_inds_dists[i0].reserve(max_count);
 // Find neighbors
+		//std::cerr << p0.x << p0.y << p0.z<<std::endl;
 		scalar_t query_pt[3] = { p0.x, p0.y, p0.z};
+
+
 		size_t nMatches = index->radiusSearch(query_pt, r2, all_inds_dists[i0], search_params);
 // Update max count
 
@@ -217,8 +227,10 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 		max_count = max_num;
 	}
 // Reserve the memory
+
 	if(mode == 0){
 		neighbors_indices.resize(query_pcd.pts.size() * max_count);
+
 		dists.resize(query_pcd.pts.size() * max_count);
 		i0 = 0;
 		sum_sb = 0;
@@ -227,6 +239,7 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 
 		for (auto& inds_dists : all_inds_dists){// Check if we changed batch
 
+
 			if (i0 == sum_qb + q_batches[b]){
 				sum_qb += q_batches[b];
 				sum_sb += s_batches[b];
@@ -239,8 +252,8 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 					dists[i0 * max_count + j] = (float) inds_dists[j].second;
 				}
 				else {
-					neighbors_indices[i0 * max_count + j] = supports.size();
-					dists[i0 * max_count + j] = radius * radius;
+					neighbors_indices[i0 * max_count + j] = supports.size()/3;
+					dists[i0 * max_count + j] = -1;
 				}
 
 			}
diff --git a/cpu/src/torch_nearest_neighbors.cpp b/cpu/src/torch_nearest_neighbors.cpp
@@ -61,20 +61,22 @@ std::pair<at::Tensor, at::Tensor> ball_query(at::Tensor query,
 
 void cumsum(const vector<long>& batch, vector<long>& res){
 
-	res.resize(batch[batch.size()-1]-batch[0]+1, 0);
+	res.resize(batch[batch.size()-1]-batch[0]+2, 0);
 	long ind = batch[0];
 	long incr = 1;
-	for(int i=1; i < batch.size(); i++){
-
-		if(batch[i] == ind)
-			incr++;
-		else{
-			res[ind-batch[0]] = incr;
-			incr =1;
-			ind = batch[i];
+	if(res.size() > 1){
+		for(int i=1; i < batch.size(); i++){
+			if(batch[i] == ind)
+				incr++;
+			else{
+				res[ind-batch[0]+1] = incr;
+				incr =1;
+				ind = batch[i];
+			}
 		}
+
 	}
-	res[ind-batch[0]] = incr;
+	res[ind-batch[0]+1] = incr;
 }
 
 std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
@@ -89,9 +91,11 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 	std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
 	std::vector<long> cumsum_query_batch_stl;
 	cumsum(query_batch_stl, cumsum_query_batch_stl);
+
 	std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
 	std::vector<long> cumsum_support_batch_stl;
 	cumsum(support_batch_stl, cumsum_support_batch_stl);
+
 	std::vector<long> neighbors_indices;
 
 	auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
@@ -107,6 +111,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 	std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
 								   data_s + support.size(0)*support.size(1));
 
+
 	max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
 							supports_stl,
 							cumsum_query_batch_stl,
@@ -117,6 +122,7 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 							max_num,
 							mode);
 	});
+
 	long* neighbors_indices_ptr = neighbors_indices.data();
 	auto neighbors_dists_ptr = neighbors_dists.data();
 
@@ -135,3 +141,23 @@ std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 	}
 	return std::make_pair(out.clone(), out_dists.clone());
 }
+
+
+std::pair<at::Tensor, at::Tensor> dense_ball_query(at::Tensor query,
+						   at::Tensor support,
+						   float radius, int max_num, int mode){
+
+	int b = query.size(0);
+	vector<at::Tensor> batch_idx;
+	vector<at::Tensor> batch_dist;
+	for (int i=0; i < b; i++){
+
+		auto out_pair = ball_query(query[i], support[i], radius, max_num, mode);
+		batch_idx.push_back(out_pair.first);
+		batch_dist.push_back(out_pair.second);
+	}
+	auto out_idx = torch::stack(batch_idx);
+	auto out_dist = torch::stack(batch_dist);
+	return std::make_pair(out_idx, out_dist);
+
+}
diff --git a/setup.py b/setup.py
@@ -1,5 +1,10 @@
 from setuptools import setup, find_packages
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, CppExtension
+from torch.utils.cpp_extension import (
+    BuildExtension,
+    CUDAExtension,
+    CUDA_HOME,
+    CppExtension,
+)
 import glob
 
 ext_src_root = "cuda"
@@ -33,12 +38,14 @@
     )
 )
 
+requirements = ["torch^1.1.0"]
+
 setup(
     name="torch_points",
-    version="0.1.4",
+    version="0.1.5",
     author="Nicolas Chaulet",
     packages=find_packages(),
-    install_requires=[],
+    install_requires=requirements,
     ext_modules=ext_modules,
     cmdclass={"build_ext": BuildExtension},
 )
diff --git a/test/test_ballquerry.py b/test/test_ballquerry.py
@@ -10,43 +10,88 @@ def test_simple_gpu(self):
         a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float).cuda()
         b = torch.tensor([[[0, 0, 0]]]).to(torch.float).cuda()
 
-        npt.assert_array_equal(ball_query(1, 2, a, b).detach().cpu().numpy(), np.array([[[0, 0]]]))
+        npt.assert_array_equal(
+            ball_query(1, 2, a, b).detach().cpu().numpy(), np.array([[[0, 0]]])
+        )
 
     def test_larger_gpu(self):
         a = torch.randn(32, 4096, 3).to(torch.float).cuda()
         idx = ball_query(1, 64, a, a).detach().cpu().numpy()
-        self.assertGreaterEqual(idx.min(),0)
+        self.assertGreaterEqual(idx.min(), 0)
 
-    # def test_simple_cpu(self):
-    #     a = torch.tensor([[[0, 0, 0], [1, 0, 0], [2, 0, 0]]]).to(torch.float)
-    #     b = torch.tensor([[[0, 0, 0]]]).to(torch.float)
-    #     npt.assert_array_equal(ball_query(1, 2, a, b).detach().numpy(), np.array([[[0, 0]]]))
+    def test_cpu_gpu_equality(self):
+        a = torch.randn(5, 1000, 3)
+        res_cpu = ball_query(0.1, 17, a, a).detach().numpy()
+        res_cuda = ball_query(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy()
+        for i in range(a.shape[0]):
+            for j in range(a.shape[1]):
+                # Because it is not necessary the same order
+                assert set(res_cpu[i][j]) == set(res_cuda[i][j])
 
-    # def test_cpu_gpu_equality(self):
-    #     a = torch.randn(5, 1000, 3)
-    #     npt.assert_array_equal(ball_query(0.1, 17, a, a).detach().numpy(),
-    #                            ball_query(0.1, 17, a.cuda(), a.cuda()).cpu().detach().numpy())
 
-    def test_partial_gpu(self):
-        x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float).cuda()
+class TestBallPartial(unittest.TestCase):
+    def test_simple_gpu(self):
+        x = (
+            torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]])
+            .to(torch.float)
+            .cuda()
+        )
         y = torch.tensor([[0, 0, 0]]).to(torch.float).cuda()
         batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
         batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
-        
+
         batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long().cuda()
         batch_y = torch.from_numpy(np.asarray([0])).long().cuda()
 
-        idx, dist2 = ball_query(1., 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
+        idx, dist2 = ball_query(
+            1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
+        )
 
         idx = idx.detach().cpu().numpy()
         dist2 = dist2.detach().cpu().numpy()
 
         idx_answer = np.asarray([[1, 4]])
-        dist2_answer = np.asarray([[ 0.0100, -1.0000]]).astype(np.float32)
+        dist2_answer = np.asarray([[0.0100, -1.0000]]).astype(np.float32)
 
         npt.assert_array_almost_equal(idx, idx_answer)
         npt.assert_array_almost_equal(dist2, dist2_answer)
 
+    def test_simple_cpu(self):
+        x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(
+            torch.float
+        )
+        y = torch.tensor([[0, 0, 0]]).to(torch.float)
+
+        batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
+        batch_y = torch.from_numpy(np.asarray([0])).long()
+
+        idx, dist2 = ball_query(
+            1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y
+        )
+
+        idx = idx.detach().cpu().numpy()
+        dist2 = dist2.detach().cpu().numpy()
+
+        idx_answer = np.asarray([[1, 1], [0, 1], [1, 1], [1, 1]])
+        dist2_answer = np.asarray([[-1, -1], [0.01, -1], [-1, -1], [-1, -1]]).astype(
+            np.float32
+        )
+
+        npt.assert_array_almost_equal(idx, idx_answer)
+        npt.assert_array_almost_equal(dist2, dist2_answer)
+
+    def test_random_cpu(self):
+        a = torch.randn(1000, 3).to(torch.float)
+        b = torch.randn(1500, 3).to(torch.float)
+        batch_a = torch.randint(1, (1000,)).sort(0)[0].long()
+        batch_b = torch.randint(1, (1500,)).sort(0)[0].long()
+        idx, dist = ball_query(
+            1.0, 12, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b
+        )
+        idx2, dist2 = ball_query(
+            1.0, 12, b, a, mode="PARTIAL_DENSE", batch_x=batch_b, batch_y=batch_a
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/torch_points/torchpoints.py b/torch_points/torchpoints.py

Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,7 @@ struct PointCloud`
`57`	`57`	`pts = temp;`
`58`	`58`	`}`
`59`	`59`	`void set_batch(std::vector<scalar_t> new_pts, int begin, int size){`
	`60`	`+`
`60`	`61`	`std::vector<PointXYZ> temp(size);`
`61`	`62`	`for(int i=0; i < size; i++){`
`62`	`63`	`PointXYZ point;`