Skip to content

Commit 8273409

Browse files
Tao-Shengmvieth
andauthored
✨ feat: Support parallel computation for NDT (#6376)
* ✨ feat: Support parallel computation for NDT Support parallel computation for NDT by OpenMP with different neighbor search method * 🐞 fix: OpenMP Requires Signed Indices Use signed std::ptrdiff_t instead of std::size_t * 🐞 fix: Apply copilot suggestion Apply copilot suggestion to fix some error * Fix and improve NDT parallelization - Added the switch to select different search methods to computeHessian() as well (would not make sense to use different search methods in computeDerivatives() and computeHessian()) - Added overloads to functions computePointDerivatives(), updateDerivatives(), and updateHessian() to be able to use local variables instead of the global variables point_jacobian_ and point_hessian_ - Change the parallelized region in computeDerivatives() by using the new function overloads, and by introducing a custom reduction (goal: compatibility with OpenMP 2.0). Synchronization overhead between threads is minimal, with only two critical regions at the end to accumulate, score, gradient, and hessian - Tested with two larger point clouds, and there is indeed a nice speed-up when increasing the number of threads (however not a perfect 1-to-1 speedup because there are still other functions in NDT that are not parallelized) * Fixes per copilot * Fix formatting * Also add DIRECT27 --------- Co-authored-by: Markus Vieth <mvieth@techfak.uni-bielefeld.de>
1 parent 279d91f commit 8273409

File tree

3 files changed

+287
-63
lines changed

3 files changed

+287
-63
lines changed

registration/include/pcl/registration/impl/ndt.hpp

Lines changed: 189 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,28 @@
4343

4444
namespace pcl {
4545

46+
template <typename PointSource, typename PointTarget, typename Scalar>
47+
void
48+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::setNumberOfThreads(
49+
unsigned int nr_threads)
50+
{
51+
#ifdef _OPENMP
52+
if (nr_threads == 0)
53+
threads_ = omp_get_num_procs();
54+
else
55+
threads_ = nr_threads;
56+
PCL_DEBUG("[pcl::NormalDistributionsTransform::setNumberOfThreads] Setting "
57+
"number of threads to %u.\n",
58+
threads_);
59+
#else
60+
threads_ = 1;
61+
if (nr_threads != 1)
62+
PCL_WARN("[pcl::NormalDistributionsTransform::setNumberOfThreads] "
63+
"Parallelization is requested, but OpenMP is not available! Continuing "
64+
"without parallelization.\n");
65+
#endif // _OPENMP
66+
}
67+
4668
template <typename PointSource, typename PointTarget, typename Scalar>
4769
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::
4870
NormalDistributionsTransform()
@@ -196,38 +218,84 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeDerivativ
196218
// Precompute Angular Derivatives (eq. 6.19 and 6.21)[Magnusson 2009]
197219
computeAngleDerivatives(transform);
198220

199-
// Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson 2009]
200-
for (std::size_t idx = 0; idx < input_->size(); idx++) {
201-
// Transformed Point
202-
const auto& x_trans_pt = trans_cloud[idx];
203-
204-
// Find neighbors (Radius search has been experimentally faster than direct neighbor
205-
// checking.
206-
std::vector<TargetGridLeafConstPtr> neighborhood;
207-
std::vector<float> distances;
208-
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
221+
std::vector<TargetGridLeafConstPtr> neighborhood;
222+
std::vector<float> distances;
223+
224+
Eigen::Matrix<double, 3, 6> point_jacobian = Eigen::Matrix<double, 3, 6>::Zero();
225+
point_jacobian.block<3, 3>(0, 0).setIdentity();
226+
Eigen::Matrix<double, 18, 6> point_hessian = Eigen::Matrix<double, 18, 6>::Zero();
227+
228+
#pragma omp parallel num_threads(threads_) default(none) shared(score_gradient, hessian, trans_cloud, compute_hessian) firstprivate(neighborhood, distances, point_jacobian, point_hessian) reduction(+: score)
229+
{
230+
// workaround for a user-defined reduction on score_gradient and hessian, that still
231+
// works with OpenMP 2.0
232+
Eigen::Matrix<double, 6, 1> score_gradient_private =
233+
Eigen::Matrix<double, 6, 1>::Zero();
234+
Eigen::Matrix<double, 6, 6> hessian_private = Eigen::Matrix<double, 6, 6>::Zero();
235+
#pragma omp for schedule(dynamic, 32)
236+
// Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson
237+
// 2009]
238+
for (std::ptrdiff_t idx = 0; idx < static_cast<std::ptrdiff_t>(input_->size());
239+
idx++) {
240+
// Transformed Point
241+
const auto& x_trans_pt = trans_cloud[idx];
242+
243+
// Find neighbors with different search method
244+
neighborhood.clear();
245+
switch (search_method_) {
246+
case NeighborSearchMethod::RADIUS:
247+
// Radius search has been experimentally faster than direct neighbor checking.
248+
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
249+
break;
250+
case NeighborSearchMethod::DIRECT27:
251+
target_cells_.getAllNeighborsAtPoint(x_trans_pt, neighborhood);
252+
break;
253+
case NeighborSearchMethod::DIRECT26:
254+
target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
255+
break;
256+
case NeighborSearchMethod::DIRECT7:
257+
target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood);
258+
break;
259+
case NeighborSearchMethod::DIRECT1:
260+
target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood);
261+
break;
262+
}
209263

210-
for (const auto& cell : neighborhood) {
211264
// Original Point
212-
const auto& x_pt = (*input_)[idx];
213-
const Eigen::Vector3d x = x_pt.getVector3fMap().template cast<double>();
214-
215-
// Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
216-
const Eigen::Vector3d x_trans =
217-
x_trans_pt.getVector3fMap().template cast<double>() - cell->getMean();
218-
// Inverse Covariance of Occupied Voxel
219-
// Uses precomputed covariance for speed.
220-
const Eigen::Matrix3d c_inv = cell->getInverseCov();
221-
222-
// Compute derivative of transform function w.r.t. transform vector, J_E and H_E
223-
// in Equations 6.18 and 6.20 [Magnusson 2009]
224-
computePointDerivatives(x);
225-
// Update score, gradient and hessian, lines 19-21 in Algorithm 2, according to
226-
// Equations 6.10, 6.12 and 6.13, respectively [Magnusson 2009]
227-
score +=
228-
updateDerivatives(score_gradient, hessian, x_trans, c_inv, compute_hessian);
265+
const Eigen::Vector3d x = (*input_)[idx].getVector3fMap().template cast<double>();
266+
const Eigen::Vector3d x_trans_position =
267+
x_trans_pt.getVector3fMap().template cast<double>();
268+
269+
for (const auto& cell : neighborhood) {
270+
// Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
271+
const Eigen::Vector3d x_trans = x_trans_position - cell->getMean();
272+
// Inverse Covariance of Occupied Voxel
273+
// Uses precomputed covariance for speed.
274+
const Eigen::Matrix3d c_inv = cell->getInverseCov();
275+
276+
// Compute derivative of transform function w.r.t. transform vector, J_E and H_E
277+
// in Equations 6.18 and 6.20 [Magnusson 2009]
278+
computePointDerivatives(
279+
x, point_jacobian, compute_hessian ? &point_hessian : nullptr);
280+
// Update score, gradient and hessian, lines 19-21 in Algorithm 2, according to
281+
// Equations 6.10, 6.12 and 6.13, respectively [Magnusson 2009]
282+
score += updateDerivatives(score_gradient_private,
283+
hessian_private,
284+
x_trans,
285+
c_inv,
286+
point_jacobian,
287+
compute_hessian ? &point_hessian : nullptr);
288+
}
229289
}
230-
}
290+
#pragma omp critical(accum_score_gradient)
291+
{
292+
score_gradient += score_gradient_private;
293+
}
294+
#pragma omp critical(accum_hessian)
295+
{
296+
hessian += hessian_private;
297+
}
298+
} // end of omp parallel
231299
return score;
232300
}
233301

@@ -319,23 +387,25 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeAngleDeri
319387
template <typename PointSource, typename PointTarget, typename Scalar>
320388
void
321389
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computePointDerivatives(
322-
const Eigen::Vector3d& x, bool compute_hessian)
390+
const Eigen::Vector3d& x,
391+
Eigen::Matrix<double, 3, 6>& point_jacobian,
392+
Eigen::Matrix<double, 18, 6>* point_hessian) const
323393
{
324394
// Calculate first derivative of Transformation Equation 6.17 w.r.t. transform vector.
325395
// Derivative w.r.t. ith element of transform vector corresponds to column i,
326396
// Equation 6.18 and 6.19 [Magnusson 2009]
327397
Eigen::Matrix<double, 8, 1> point_angular_jacobian =
328398
angular_jacobian_ * Eigen::Vector4d(x[0], x[1], x[2], 0.0);
329-
point_jacobian_(1, 3) = point_angular_jacobian[0];
330-
point_jacobian_(2, 3) = point_angular_jacobian[1];
331-
point_jacobian_(0, 4) = point_angular_jacobian[2];
332-
point_jacobian_(1, 4) = point_angular_jacobian[3];
333-
point_jacobian_(2, 4) = point_angular_jacobian[4];
334-
point_jacobian_(0, 5) = point_angular_jacobian[5];
335-
point_jacobian_(1, 5) = point_angular_jacobian[6];
336-
point_jacobian_(2, 5) = point_angular_jacobian[7];
337-
338-
if (compute_hessian) {
399+
point_jacobian(1, 3) = point_angular_jacobian[0];
400+
point_jacobian(2, 3) = point_angular_jacobian[1];
401+
point_jacobian(0, 4) = point_angular_jacobian[2];
402+
point_jacobian(1, 4) = point_angular_jacobian[3];
403+
point_jacobian(2, 4) = point_angular_jacobian[4];
404+
point_jacobian(0, 5) = point_angular_jacobian[5];
405+
point_jacobian(1, 5) = point_angular_jacobian[6];
406+
point_jacobian(2, 5) = point_angular_jacobian[7];
407+
408+
if (point_hessian) {
339409
Eigen::Matrix<double, 15, 1> point_angular_hessian =
340410
angular_hessian_ * Eigen::Vector4d(x[0], x[1], x[2], 0.0);
341411

@@ -350,26 +420,36 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computePointDeri
350420
// Calculate second derivative of Transformation Equation 6.17 w.r.t. transform
351421
// vector. Derivative w.r.t. ith and jth elements of transform vector corresponds to
352422
// the 3x1 block matrix starting at (3i,j), Equation 6.20 and 6.21 [Magnusson 2009]
353-
point_hessian_.block<3, 1>(9, 3) = a;
354-
point_hessian_.block<3, 1>(12, 3) = b;
355-
point_hessian_.block<3, 1>(15, 3) = c;
356-
point_hessian_.block<3, 1>(9, 4) = b;
357-
point_hessian_.block<3, 1>(12, 4) = d;
358-
point_hessian_.block<3, 1>(15, 4) = e;
359-
point_hessian_.block<3, 1>(9, 5) = c;
360-
point_hessian_.block<3, 1>(12, 5) = e;
361-
point_hessian_.block<3, 1>(15, 5) = f;
423+
point_hessian->block<3, 1>(9, 3) = a;
424+
point_hessian->block<3, 1>(12, 3) = b;
425+
point_hessian->block<3, 1>(15, 3) = c;
426+
point_hessian->block<3, 1>(9, 4) = b;
427+
point_hessian->block<3, 1>(12, 4) = d;
428+
point_hessian->block<3, 1>(15, 4) = e;
429+
point_hessian->block<3, 1>(9, 5) = c;
430+
point_hessian->block<3, 1>(12, 5) = e;
431+
point_hessian->block<3, 1>(15, 5) = f;
362432
}
363433
}
364434

435+
template <typename PointSource, typename PointTarget, typename Scalar>
436+
void
437+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computePointDerivatives(
438+
const Eigen::Vector3d& x, bool compute_hessian)
439+
{
440+
computePointDerivatives(
441+
x, point_jacobian_, compute_hessian ? &point_hessian_ : nullptr);
442+
}
443+
365444
template <typename PointSource, typename PointTarget, typename Scalar>
366445
double
367446
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateDerivatives(
368447
Eigen::Matrix<double, 6, 1>& score_gradient,
369448
Eigen::Matrix<double, 6, 6>& hessian,
370449
const Eigen::Vector3d& x_trans,
371450
const Eigen::Matrix3d& c_inv,
372-
bool compute_hessian) const
451+
const Eigen::Matrix<double, 3, 6>& point_jacobian,
452+
const Eigen::Matrix<double, 18, 6>* point_hessian) const
373453
{
374454
// e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
375455
double e_x_cov_x = std::exp(-gauss_d2_ * x_trans.dot(c_inv * x_trans) / 2);
@@ -390,26 +470,43 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateDerivative
390470
for (int i = 0; i < 6; i++) {
391471
// Sigma_k^-1 d(T(x,p))/dpi, Reusable portion of Equation 6.12 and 6.13 [Magnusson
392472
// 2009]
393-
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian_.col(i);
473+
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian.col(i);
394474

395475
// Update gradient, Equation 6.12 [Magnusson 2009]
396476
score_gradient(i) += x_trans.dot(cov_dxd_pi) * e_x_cov_x;
397477

398-
if (compute_hessian) {
478+
if (point_hessian) {
399479
for (Eigen::Index j = 0; j < hessian.cols(); j++) {
400480
// Update hessian, Equation 6.13 [Magnusson 2009]
401481
hessian(i, j) +=
402482
e_x_cov_x * (-gauss_d2_ * x_trans.dot(cov_dxd_pi) *
403-
x_trans.dot(c_inv * point_jacobian_.col(j)) +
404-
x_trans.dot(c_inv * point_hessian_.block<3, 1>(3 * i, j)) +
405-
point_jacobian_.col(j).dot(cov_dxd_pi));
483+
x_trans.dot(c_inv * point_jacobian.col(j)) +
484+
x_trans.dot(c_inv * point_hessian->block<3, 1>(3 * i, j)) +
485+
point_jacobian.col(j).dot(cov_dxd_pi));
406486
}
407487
}
408488
}
409489

410490
return score_inc;
411491
}
412492

493+
template <typename PointSource, typename PointTarget, typename Scalar>
494+
double
495+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateDerivatives(
496+
Eigen::Matrix<double, 6, 1>& score_gradient,
497+
Eigen::Matrix<double, 6, 6>& hessian,
498+
const Eigen::Vector3d& x_trans,
499+
const Eigen::Matrix3d& c_inv,
500+
bool compute_hessian) const
501+
{
502+
return updateDerivatives(score_gradient,
503+
hessian,
504+
x_trans,
505+
c_inv,
506+
point_jacobian_,
507+
compute_hessian ? &point_hessian_ : nullptr);
508+
}
509+
413510
template <typename PointSource, typename PointTarget, typename Scalar>
414511
void
415512
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeHessian(
@@ -424,11 +521,28 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeHessian(
424521
// Transformed Point
425522
const auto& x_trans_pt = trans_cloud[idx];
426523

427-
// Find neighbors (Radius search has been experimentally faster than direct neighbor
428-
// checking.
429524
std::vector<TargetGridLeafConstPtr> neighborhood;
430525
std::vector<float> distances;
431-
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
526+
527+
// Find neighbors with different search methods
528+
switch (search_method_) {
529+
case NeighborSearchMethod::RADIUS:
530+
// Radius search has been experimentally faster than direct neighbor checking.
531+
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
532+
break;
533+
case NeighborSearchMethod::DIRECT27:
534+
target_cells_.getAllNeighborsAtPoint(x_trans_pt, neighborhood);
535+
break;
536+
case NeighborSearchMethod::DIRECT26:
537+
target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
538+
break;
539+
case NeighborSearchMethod::DIRECT7:
540+
target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood);
541+
break;
542+
case NeighborSearchMethod::DIRECT1:
543+
target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood);
544+
break;
545+
}
432546

433547
for (const auto& cell : neighborhood) {
434548
// Original Point
@@ -457,7 +571,9 @@ void
457571
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateHessian(
458572
Eigen::Matrix<double, 6, 6>& hessian,
459573
const Eigen::Vector3d& x_trans,
460-
const Eigen::Matrix3d& c_inv) const
574+
const Eigen::Matrix3d& c_inv,
575+
const Eigen::Matrix<double, 3, 6>& point_jacobian,
576+
const Eigen::Matrix<double, 18, 6>& point_hessian) const
461577
{
462578
// e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
463579
double e_x_cov_x =
@@ -474,19 +590,29 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateHessian(
474590
for (int i = 0; i < 6; i++) {
475591
// Sigma_k^-1 d(T(x,p))/dpi, Reusable portion of Equation 6.12 and 6.13 [Magnusson
476592
// 2009]
477-
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian_.col(i);
593+
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian.col(i);
478594

479595
for (Eigen::Index j = 0; j < hessian.cols(); j++) {
480596
// Update hessian, Equation 6.13 [Magnusson 2009]
481597
hessian(i, j) +=
482598
e_x_cov_x * (-gauss_d2_ * x_trans.dot(cov_dxd_pi) *
483-
x_trans.dot(c_inv * point_jacobian_.col(j)) +
484-
x_trans.dot(c_inv * point_hessian_.block<3, 1>(3 * i, j)) +
485-
point_jacobian_.col(j).dot(cov_dxd_pi));
599+
x_trans.dot(c_inv * point_jacobian.col(j)) +
600+
x_trans.dot(c_inv * point_hessian.block<3, 1>(3 * i, j)) +
601+
point_jacobian.col(j).dot(cov_dxd_pi));
486602
}
487603
}
488604
}
489605

606+
template <typename PointSource, typename PointTarget, typename Scalar>
607+
void
608+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateHessian(
609+
Eigen::Matrix<double, 6, 6>& hessian,
610+
const Eigen::Vector3d& x_trans,
611+
const Eigen::Matrix3d& c_inv) const
612+
{
613+
updateHessian(hessian, x_trans, c_inv, point_jacobian_, point_hessian_);
614+
}
615+
490616
template <typename PointSource, typename PointTarget, typename Scalar>
491617
bool
492618
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateIntervalMT(

0 commit comments

Comments
 (0)