Merge pull request #14 from humanpose1/debug_cpu

nicolas-chaulet · web-flow · commit b1cde1603ee6 · 2020-01-21T09:29:08.000Z
debug of cpu ball query
diff --git a/cpu/src/neighbors.cpp b/cpu/src/neighbors.cpp
@@ -86,7 +86,7 @@ int nanoflann_neighbors(vector<scalar_t>& queries,
 		for (auto& inds : list_matches){
 			token = inds[0].first;
 			for (int j = 0; j < max_count; j++){
-				if (j < inds.size()){
+				if ((unsigned int)j < inds.size()){
 					neighbors_indices[i0 * max_count + j] = inds[j].first;
 					dists[i0 * max_count + j] = (float) inds[j].second;
 
@@ -116,7 +116,7 @@ int nanoflann_neighbors(vector<scalar_t>& queries,
 		int u = 0; // curent index of the neighbors_indices
 		for (auto& inds : list_matches){
 			for (int j = 0; j < max_count; j++){
-				if(j < inds.size()){
+				if((unsigned int)j < inds.size()){
 					neighbors_indices[u] = inds[j].first;
 					neighbors_indices[u + 1] = i0;
 					dists[u/2] = (float) inds[j].second;
@@ -158,9 +158,8 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 
 
 	// batch index
-	long b = 0;
-	long sum_qb = 0;
-	long sum_sb = 0;
+	int b = 0;
+
 
 	// Nanoflann related variables
 	// ***************************
@@ -180,7 +179,7 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 // Pointer to trees
 	my_kd_tree_t* index;
     // Build KDTree for the first batch element
-	current_cloud.set_batch(supports, sum_sb, s_batches[b]);
+	current_cloud.set_batch(supports, s_batches[b], s_batches[b+1]);
 	index = new my_kd_tree_t(3, current_cloud, tree_params);
 	index->buildIndex();
 // Search neigbors indices
@@ -190,21 +189,22 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 	search_params.sorted = true;
 	for (auto& p0 : query_pcd.pts){
 // Check if we changed batch
-		if (i0 == sum_qb + q_batches[b] && b < s_batches.size()){
-			sum_qb += q_batches[b];
-			sum_sb += s_batches[b];
 
-			b++;
+		if (i0 == q_batches[b+1] && b < (int)s_batches.size()-1 && b < (int)q_batches.size()-1){
 
 // Change the points
+			b++;
 			current_cloud.pts.clear();
-			current_cloud.set_batch(supports, sum_sb, s_batches[b]);
+			if(s_batches[b] < s_batches[b+1])
+				current_cloud.set_batch(supports, s_batches[b], s_batches[b+1]);
 // Build KDTree of the current element of the batch
 			delete index;
 
 			index = new my_kd_tree_t(3, current_cloud, tree_params);
 			index->buildIndex();
+
 		}
+
 // Initial guess of neighbors size
 
 
@@ -233,22 +233,19 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 
 		dists.resize(query_pcd.pts.size() * max_count);
 		i0 = 0;
-		sum_sb = 0;
-		sum_qb = 0;
+
 		b = 0;
 
 		for (auto& inds_dists : all_inds_dists){// Check if we changed batch
 
 
-			if (i0 == sum_qb + q_batches[b]){
-				sum_qb += q_batches[b];
-				sum_sb += s_batches[b];
+			if (i0 == q_batches[b+1] && b < (int)s_batches.size()-1 && b < (int)q_batches.size()-1){
 				b++;
 			}
 
 			for (int j = 0; j < max_count; j++){
-				if (j < inds_dists.size()){
-					neighbors_indices[i0 * max_count + j] = inds_dists[j].first + sum_sb;
+				if ((unsigned int)j < inds_dists.size()){
+					neighbors_indices[i0 * max_count + j] = inds_dists[j].first + s_batches[b];
 					dists[i0 * max_count + j] = (float) inds_dists[j].second;
 				}
 				else {
@@ -273,19 +270,15 @@ int batch_nanoflann_neighbors (vector<scalar_t>& queries,
 		neighbors_indices.resize(size * 2);
 		dists.resize(size);
 		i0 = 0;
-		sum_sb = 0;
-		sum_qb = 0;
 		b = 0;
 		int u = 0;
 		for (auto& inds_dists : all_inds_dists){
-			if (i0 == sum_qb + q_batches[b]){
-				sum_qb += q_batches[b];
-				sum_sb += s_batches[b];
+			if (i0 == q_batches[b+1] && b < (int)s_batches.size()-1 && b < (int)q_batches.size()-1){
 				b++;
 			}
 			for (int j = 0; j < max_count; j++){
-				if (j < inds_dists.size()){
-					neighbors_indices[u] = inds_dists[j].first + sum_sb;
+				if ((unsigned int)j < inds_dists.size()){
+					neighbors_indices[u] = inds_dists[j].first + s_batches[b];
 					neighbors_indices[u + 1] = i0;
 					dists[u/2] = (float) inds_dists[j].second;
 					u += 2;
diff --git a/cpu/src/torch_nearest_neighbors.cpp b/cpu/src/torch_nearest_neighbors.cpp
@@ -59,87 +59,71 @@ std::pair<at::Tensor, at::Tensor> ball_query(at::Tensor query,
 	return std::make_pair(out.clone(), out_dists.clone());
 }
 
-void cumsum(const vector<long>& batch, vector<long>& res){
-
-	res.resize(batch[batch.size()-1]-batch[0]+2, 0);
-	long ind = batch[0];
-	long incr = 1;
-	if(res.size() > 1){
-		for(int i=1; i < batch.size(); i++){
-			if(batch[i] == ind)
-				incr++;
-			else{
-				res[ind-batch[0]+1] = incr;
-				incr =1;
-				ind = batch[i];
-			}
-		}
-
-	}
-	res[ind-batch[0]+1] = incr;
+at::Tensor degree(at::Tensor row, int64_t num_nodes) {
+	auto zero = at::zeros(num_nodes, row.options());
+	auto one = at::ones(row.size(0), row.options());
+	return zero.scatter_add_(0, row, one);
 }
 
 std::pair<at::Tensor, at::Tensor> batch_ball_query(at::Tensor query,
 						   at::Tensor support,
 						   at::Tensor query_batch,
 						   at::Tensor support_batch,
 						   float radius, int max_num, int mode) {
-	at::Tensor out;
-	at::Tensor out_dists;
-	auto data_qb = query_batch.DATA_PTR<long>();
-	auto data_sb = support_batch.DATA_PTR<long>();
-	std::vector<long> query_batch_stl = std::vector<long>(data_qb, data_qb+query_batch.size(0));
-	std::vector<long> cumsum_query_batch_stl;
-	cumsum(query_batch_stl, cumsum_query_batch_stl);
-
-	std::vector<long> support_batch_stl = std::vector<long>(data_sb, data_sb+support_batch.size(0));
-	std::vector<long> cumsum_support_batch_stl;
-	cumsum(support_batch_stl, cumsum_support_batch_stl);
+	at::Tensor idx;
 
+	at::Tensor dist;
 	std::vector<long> neighbors_indices;
+	std::vector<float> neighbors_dists;
 
 	auto options = torch::TensorOptions().dtype(torch::kLong).device(torch::kCPU);
 	auto options_dist = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCPU);
+
 	int max_count = 0;
-	std::vector<float> neighbors_dists;
+	auto batch_access = query_batch.accessor<int64_t, 1>();
+	auto batch_size = batch_access[-1] + 1;
+	query_batch = degree(query_batch, batch_size);
+	query_batch = at::cat({at::zeros(1, query_batch.options()), query_batch.cumsum(0)}, 0);
+	support_batch = degree(support_batch, batch_size);
+	support_batch = at::cat({at::zeros(1, support_batch.options()), support_batch.cumsum(0)}, 0);
+	std::vector<long> query_batch_stl(query_batch.DATA_PTR<long>(), query_batch.DATA_PTR<long>() + query_batch.numel());
+	std::vector<long> support_batch_stl(support_batch.DATA_PTR<long>(), support_batch.DATA_PTR<long>() + support_batch.numel());
+
 	AT_DISPATCH_ALL_TYPES(query.scalar_type(), "batch_radius_search", [&] {
 
-	auto data_q = query.DATA_PTR<scalar_t>();
-	auto data_s = support.DATA_PTR<scalar_t>();
-	std::vector<scalar_t> queries_stl = std::vector<scalar_t>(data_q,
-								  data_q + query.size(0)*query.size(1));
-	std::vector<scalar_t> supports_stl = std::vector<scalar_t>(data_s,
-								   data_s + support.size(0)*support.size(1));
+        std::vector<scalar_t> queries_stl(query.DATA_PTR<scalar_t>(), query.DATA_PTR<scalar_t>() + query.numel());
+	std::vector<scalar_t> supports_stl(support.DATA_PTR<scalar_t>(), support.DATA_PTR<scalar_t>() + support.numel());
 
 
-	max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
+        max_count = batch_nanoflann_neighbors<scalar_t>(queries_stl,
 							supports_stl,
-							cumsum_query_batch_stl,
-							cumsum_support_batch_stl,
+							query_batch_stl,
+							support_batch_stl,
 							neighbors_indices,
 							neighbors_dists,
 							radius,
 							max_num,
 							mode);
-	});
-
-	long* neighbors_indices_ptr = neighbors_indices.data();
-	auto neighbors_dists_ptr = neighbors_dists.data();
 
 
+	});
+	auto neighbors_dists_ptr = neighbors_dists.data();
+	long* neighbors_indices_ptr = neighbors_indices.data();
 	if(mode == 0){
-		out = torch::from_blob(neighbors_indices_ptr, {query.size(0), max_count}, options=options);
-		out_dists = torch::from_blob(neighbors_dists_ptr,
-	 				     {query.size(0), max_count},
-	 				     options=options_dist);
+		idx = torch::from_blob(neighbors_indices_ptr, {query.size(0), max_count}, options=options);
+		dist = torch::from_blob(neighbors_dists_ptr,
+					{query.size(0), max_count},
+					options=options_dist);
+
 	}
-	else if(mode == 1){
-		out = torch::from_blob(neighbors_indices_ptr, {(int)neighbors_indices.size()/2, 2}, options=options);
-		out_dists = torch::from_blob(neighbors_dists_ptr,
-					     {(int)neighbors_indices.size()/2, 1},
-					     options=options_dist);
+	else if(mode ==1){
+		idx = torch::from_blob(neighbors_indices_ptr, {(int)neighbors_indices.size()/2, 2}, options=options);
+		dist = torch::from_blob(neighbors_dists_ptr,
+					{(int)neighbors_indices.size()/2, 1},
+					options=options_dist);
 	}
-	return std::make_pair(out.clone(), out_dists.clone());
+	return std::make_pair(idx.clone(), dist.clone());
+
 }
 
 
diff --git a/test/test_ballquerry.py b/test/test_ballquerry.py
@@ -3,6 +3,7 @@
 from torch_points import ball_query
 import numpy.testing as npt
 import numpy as np
+from sklearn.neighbors import KDTree
 
 from . import run_if_cuda
 
@@ -54,23 +55,23 @@ def test_simple_gpu(self):
         npt.assert_array_almost_equal(idx, idx_answer)
         npt.assert_array_almost_equal(dist2, dist2_answer)
 
-    # def test_simple_cpu(self):
-    #     x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float)
-    #     y = torch.tensor([[0, 0, 0]]).to(torch.float)
+    def test_simple_cpu(self):
+        x = torch.tensor([[10, 0, 0], [0.1, 0, 0], [10, 0, 0], [0.1, 0, 0]]).to(torch.float)
+        y = torch.tensor([[0, 0, 0]]).to(torch.float)
 
-    #     batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
-    #     batch_y = torch.from_numpy(np.asarray([0])).long()
+        batch_x = torch.from_numpy(np.asarray([0, 0, 1, 1])).long()
+        batch_y = torch.from_numpy(np.asarray([0])).long()
 
-    #     idx, dist2 = ball_query(1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
+        idx, dist2 = ball_query(1.0, 2, x, y, mode="PARTIAL_DENSE", batch_x=batch_x, batch_y=batch_y)
 
-    #     idx = idx.detach().cpu().numpy()
-    #     dist2 = dist2.detach().cpu().numpy()
+        idx = idx.detach().cpu().numpy()
+        dist2 = dist2.detach().cpu().numpy()
 
-    #     idx_answer = np.asarray([[1, 1], [0, 1], [1, 1], [1, 1]])
-    #     dist2_answer = np.asarray([[-1, -1], [0.01, -1], [-1, -1], [-1, -1]]).astype(np.float32)
+        idx_answer = np.asarray([[1, 1], [0, 1], [1, 1], [1, 1]])
+        dist2_answer = np.asarray([[-1, -1], [0.01, -1], [-1, -1], [-1, -1]]).astype(np.float32)
 
-    #     npt.assert_array_almost_equal(idx, idx_answer)
-    #     npt.assert_array_almost_equal(dist2, dist2_answer)
+        npt.assert_array_almost_equal(idx, idx_answer)
+        npt.assert_array_almost_equal(dist2, dist2_answer)
 
     def test_random_cpu(self):
         a = torch.randn(1000, 3).to(torch.float)
@@ -80,6 +81,19 @@ def test_random_cpu(self):
         idx, dist = ball_query(1.0, 12, a, b, mode="PARTIAL_DENSE", batch_x=batch_a, batch_y=batch_b)
         idx2, dist2 = ball_query(1.0, 12, b, a, mode="PARTIAL_DENSE", batch_x=batch_b, batch_y=batch_a)
 
+        zeros = torch.zeros_like(batch_b)
+        idx3, dist3 = ball_query(0.5, 17, b, b, mode="PARTIAL_DENSE", batch_x=zeros, batch_y=zeros)
+
+
+        # Comparison to see if we have the same result
+        tree = KDTree(b.detach().numpy())
+        idx3_sk = tree.query_radius(b.detach().numpy(), r=0.5)
+        i = np.random.randint(len(batch_b))
+        for p in idx3[i].detach().numpy():
+            if p < len(batch_b):
+                assert p in idx3_sk[i]
+
+
 
 if __name__ == "__main__":
     unittest.main()