Skip to content

Commit 56925ca

Browse files
committed
gicp_gpu (wip)
1 parent e2826fc commit 56925ca

File tree

11 files changed

+672
-42
lines changed

11 files changed

+672
-42
lines changed

CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,14 +237,15 @@ if(BUILD_WITH_CUDA)
237237
src/gtsam_points/cuda/stream_roundrobin.cu
238238
src/gtsam_points/cuda/stream_temp_buffer_roundrobin.cu
239239
# ann
240-
src/gtsam_points/ann/kdtree_cuda.cpp
241-
src/gtsam_points/ann/kdtree_cuda.cu
240+
src/gtsam_points/ann/kdtree_gpu.cpp
241+
src/gtsam_points/ann/kdtree_gpu.cu
242242
# types
243243
src/gtsam_points/types/point_cloud.cu
244244
src/gtsam_points/types/point_cloud_gpu.cu
245245
src/gtsam_points/types/gaussian_voxelmap_gpu.cu
246246
src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu
247247
# factors
248+
src/gtsam_points/factors/integrated_gicp_derivatives.cu
248249
src/gtsam_points/factors/integrated_vgicp_derivatives.cu
249250
src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu
250251
src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ struct KdTreeNodeGPU {
2929

3030
class KdTreeGPU {
3131
public:
32+
using Ptr = std::shared_ptr<KdTreeGPU>;
33+
using ConstPtr = std::shared_ptr<const KdTreeGPU>;
34+
3235
KdTreeGPU(const PointCloud::ConstPtr& points, CUstream_st* stream = nullptr);
3336
~KdTreeGPU();
3437

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// SPDX-License-Identifier: MIT
2+
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)
3+
4+
#pragma once
5+
6+
#include <Eigen/Core>
7+
#include <thrust/device_vector.h>
8+
9+
#include <gtsam_points/cuda/kernels/pose.cuh>
10+
#include <gtsam_points/cuda/kernels/linearized_system.cuh>
11+
12+
namespace gtsam_points {
13+
14+
struct gicp_derivatives_kernel {
15+
gicp_derivatives_kernel(
16+
const Eigen::Isometry3f* linearization_point_ptr,
17+
const Eigen::Vector3f* target_means,
18+
const Eigen::Matrix3f* target_covs,
19+
const Eigen::Vector3f* source_means,
20+
const Eigen::Matrix3f* source_covs)
21+
: linearization_point_ptr(linearization_point_ptr),
22+
target_means_ptr(target_means),
23+
target_covs_ptr(target_covs),
24+
source_means_ptr(source_means),
25+
source_covs_ptr(source_covs) {}
26+
27+
__device__ LinearizedSystem6 operator()(const thrust::pair<int, int>& source_target_correspondence) const {
28+
const int source_idx = source_target_correspondence.first;
29+
const int target_idx = source_target_correspondence.second;
30+
if (source_idx < 0 || target_idx < 0) {
31+
return LinearizedSystem6::zero();
32+
}
33+
34+
const Eigen::Isometry3f& x = *linearization_point_ptr;
35+
const Eigen::Matrix3f R = x.linear();
36+
const Eigen::Vector3f t = x.translation();
37+
38+
const Eigen::Vector3f mean_A = source_means_ptr[source_idx];
39+
const Eigen::Matrix3f cov_A = source_covs_ptr[source_idx];
40+
const Eigen::Vector3f transed_mean_A = R * mean_A + t;
41+
42+
const Eigen::Vector3f mean_B = target_means_ptr[target_idx];
43+
const Eigen::Matrix3f cov_B = target_covs_ptr[target_idx];
44+
45+
const Eigen::Matrix3f RCR = (R * cov_A * R.transpose());
46+
const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
47+
Eigen::Vector3f error = mean_B - transed_mean_A;
48+
49+
Eigen::Matrix<float, 3, 6> J_target;
50+
J_target.block<3, 3>(0, 0) = -skew_symmetric(transed_mean_A);
51+
J_target.block<3, 3>(0, 3) = Eigen::Matrix3f::Identity();
52+
53+
Eigen::Matrix<float, 3, 6> J_source;
54+
J_source.block<3, 3>(0, 0) = R * skew_symmetric(mean_A);
55+
J_source.block<3, 3>(0, 3) = -R;
56+
57+
Eigen::Matrix<float, 6, 3> J_target_RCR_inv = J_target.transpose() * RCR_inv;
58+
Eigen::Matrix<float, 6, 3> J_source_RCR_inv = J_source.transpose() * RCR_inv;
59+
60+
LinearizedSystem6 linearized;
61+
linearized.num_inliers = 1;
62+
linearized.error = error.transpose() * RCR_inv * error;
63+
linearized.H_target = J_target_RCR_inv * J_target;
64+
linearized.H_source = J_source_RCR_inv * J_source;
65+
linearized.H_target_source = J_target_RCR_inv * J_source;
66+
linearized.b_target = J_target_RCR_inv * error;
67+
linearized.b_source = J_source_RCR_inv * error;
68+
69+
return linearized;
70+
}
71+
72+
const Eigen::Isometry3f* linearization_point_ptr;
73+
74+
const Eigen::Vector3f* target_means_ptr;
75+
const Eigen::Matrix3f* target_covs_ptr;
76+
77+
const Eigen::Vector3f* source_means_ptr;
78+
const Eigen::Matrix3f* source_covs_ptr;
79+
};
80+
81+
struct gicp_error_kernel {
82+
gicp_error_kernel(
83+
const Eigen::Isometry3f* linearization_point_ptr,
84+
const Eigen::Isometry3f* evaluation_point_ptr,
85+
const Eigen::Vector3f* target_means,
86+
const Eigen::Matrix3f* target_covs,
87+
const Eigen::Vector3f* source_means,
88+
const Eigen::Matrix3f* source_covs)
89+
: linearization_point_ptr(linearization_point_ptr),
90+
evaluation_point_ptr(evaluation_point_ptr),
91+
target_means_ptr(target_means),
92+
target_covs_ptr(target_covs),
93+
source_means_ptr(source_means),
94+
source_covs_ptr(source_covs) {}
95+
96+
__device__ float operator()(const thrust::pair<int, int>& source_target_correspondence) const {
97+
const int source_idx = source_target_correspondence.first;
98+
const int target_idx = source_target_correspondence.second;
99+
if (source_idx < 0 || target_idx < 0) {
100+
return 0.0f;
101+
}
102+
103+
const Eigen::Isometry3f& xl = *linearization_point_ptr;
104+
const Eigen::Matrix3f Rl = xl.linear();
105+
106+
const Eigen::Isometry3f& xe = *evaluation_point_ptr;
107+
const Eigen::Matrix3f Re = xe.linear();
108+
const Eigen::Vector3f te = xe.translation();
109+
110+
const Eigen::Vector3f mean_A = source_means_ptr[source_idx];
111+
const Eigen::Matrix3f cov_A = source_covs_ptr[source_idx];
112+
const Eigen::Vector3f transed_mean_A = Re * mean_A + te;
113+
114+
const Eigen::Vector3f mean_B = target_means_ptr[target_idx];
115+
const Eigen::Matrix3f cov_B = target_covs_ptr[target_idx];
116+
117+
const Eigen::Matrix3f RCR = (Rl * cov_A * Rl.transpose());
118+
const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
119+
Eigen::Vector3f error = mean_B - transed_mean_A;
120+
121+
return error.transpose() * RCR_inv * error;
122+
}
123+
124+
const Eigen::Isometry3f* linearization_point_ptr;
125+
const Eigen::Isometry3f* evaluation_point_ptr;
126+
127+
const Eigen::Vector3f* target_means_ptr;
128+
const Eigen::Matrix3f* target_covs_ptr;
129+
const Eigen::Vector3f* source_means_ptr;
130+
const Eigen::Matrix3f* source_covs_ptr;
131+
};
132+
133+
} // namespace gtsam_points

src/gtsam_points/ann/kdtree_cuda.cu renamed to include/gtsam_points/cuda/kernels/kdtree.cuh

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,14 @@
11
// SPDX-License-Identifier: MIT
22
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)
3-
#include <gtsam_points/ann/kdtree_cuda.hpp>
4-
5-
#include <algorithm>
6-
#include <thrust/copy.h>
7-
8-
#include <thrust/pair.h>
9-
#include <thrust/for_each.h>
10-
#include <thrust/iterator/counting_iterator.h>
11-
#include <gtsam_points/cuda/check_error.cuh>
3+
#include <gtsam_points/ann/kdtree_gpu.hpp>
124

135
namespace gtsam_points {
146

15-
namespace {
16-
17-
struct nearest_neighbor_search_kernel {
7+
struct kdtree_nearest_neighbor_search_kernel {
188
public:
199
static constexpr int MAX_STACK_SIZE = 20;
2010

21-
__device__ void operator()(std::uint32_t i) const {
22-
const Eigen::Vector3f query = queries[i];
23-
11+
__device__ thrust::pair<NodeIndexType, float> operator()(const Eigen::Vector3f query) const {
2412
thrust::pair<NodeIndexType, float> result = {INVALID_NODE, std::numeric_limits<float>::max()};
2513

2614
int stack_size = 1;
@@ -72,34 +60,13 @@ public:
7260
search_stack[stack_size++].second = 0.0f;
7361
}
7462

75-
nn_indices[i] = result.first;
76-
nn_sq_dists[i] = result.second;
63+
return result;
7764
}
7865

7966
public:
8067
const Eigen::Vector3f* __restrict__ points;
8168
const std::uint32_t* __restrict__ indices;
8269
const KdTreeNodeGPU* __restrict__ nodes;
83-
84-
const Eigen::Vector3f* __restrict__ queries;
85-
86-
std::uint32_t* nn_indices;
87-
float* nn_sq_dists;
8870
};
8971

90-
} // namespace
91-
92-
void KdTreeGPU::nearest_neighbor_search(
93-
const Eigen::Vector3f* queries,
94-
const size_t num_queries,
95-
std::uint32_t* nn_indices,
96-
float* nn_sq_dists,
97-
CUstream_st* stream) {
98-
thrust::for_each(
99-
thrust::cuda::par.on(stream),
100-
thrust::counting_iterator<std::uint32_t>(0),
101-
thrust::counting_iterator<std::uint32_t>(num_queries),
102-
nearest_neighbor_search_kernel{points->points_gpu, indices, nodes, queries, nn_indices, nn_sq_dists});
103-
}
104-
105-
} // namespace gtsam_points
72+
} // namespace gtsam_points
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// SPDX-License-Identifier: MIT
2+
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)
3+
4+
#pragma once
5+
6+
#include <memory>
7+
#include <Eigen/Core>
8+
#include <Eigen/Geometry>
9+
10+
#include <gtsam_points/ann/kdtree_gpu.hpp>
11+
#include <gtsam_points/types/point_cloud_gpu.hpp>
12+
13+
struct CUstream_st;
14+
15+
namespace gtsam_points {
16+
17+
class LinearizedSystem6;
18+
class TempBufferManager;
19+
20+
class IntegratedGICPDerivatives {
21+
public:
22+
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
23+
24+
IntegratedGICPDerivatives(
25+
const PointCloud::ConstPtr& target,
26+
const PointCloud::ConstPtr& source,
27+
const KdTreeGPU::ConstPtr& target_tree,
28+
CUstream_st* ext_stream,
29+
std::shared_ptr<TempBufferManager> temp_buffer);
30+
~IntegratedGICPDerivatives();
31+
32+
void set_inlier_update_thresh(double trans, double angle) {
33+
inlier_update_thresh_trans = trans;
34+
inlier_update_thresh_angle = angle;
35+
}
36+
37+
void set_enable_offloading(bool enable) { enable_offloading = enable; }
38+
39+
void set_enable_surface_validation(bool enable) { enable_surface_validation = enable; }
40+
41+
int get_num_inliers() const { return num_inliers; }
42+
43+
void touch_points();
44+
45+
// synchronized interface
46+
LinearizedSystem6 linearize(const Eigen::Isometry3f& x);
47+
double compute_error(const Eigen::Isometry3f& xl, const Eigen::Isometry3f& xe);
48+
49+
void reset_inliers(const Eigen::Isometry3f& x, const Eigen::Isometry3f* d_x, bool force_update = false);
50+
void update_inliers(int num_inliers);
51+
52+
// async interface
53+
void sync_stream();
54+
void issue_linearize(const Eigen::Isometry3f* d_x, LinearizedSystem6* d_output);
55+
void issue_compute_error(const Eigen::Isometry3f* d_xl, const Eigen::Isometry3f* d_xe, float* d_output);
56+
57+
private:
58+
bool enable_offloading;
59+
60+
bool enable_surface_validation;
61+
double inlier_update_thresh_trans;
62+
double inlier_update_thresh_angle;
63+
64+
bool external_stream;
65+
CUstream_st* stream;
66+
std::shared_ptr<TempBufferManager> temp_buffer;
67+
68+
PointCloud::ConstPtr target;
69+
PointCloud::ConstPtr source;
70+
KdTreeGPU::ConstPtr target_tree;
71+
72+
Eigen::Isometry3f inlier_evaluation_point;
73+
const Eigen::Isometry3f* inlier_evaluation_point_gpu;
74+
75+
int num_inliers;
76+
int* num_inliers_gpu;
77+
std::pair<int, int>* source_target_correspondences;
78+
};
79+
} // namespace gtsam_points

0 commit comments

Comments
 (0)