Skip to content

Commit 01ea29b

Browse files
authored
Implement sub-sampling reduction kernels (#284)
1 parent a62de0d commit 01ea29b

File tree

3 files changed

+41
-1
lines changed

3 files changed

+41
-1
lines changed

src/gpu/nodeKernels.cu

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,34 @@ __global__ void kFilterGroundPoints(size_t pointCount, const Vec3f sensor_up_vec
223223
outNonGround[tid] = normalUpAngle > ground_angle_threshold;
224224
}
225225

226+
__global__ void kProcessBeamSamplesFirstLast(size_t beamCount, int samplesPerBeam, MultiReturnPointers beamSamples,
227+
MultiReturnPointers first, MultiReturnPointers last)
228+
{
229+
LIMIT(beamCount);
230+
231+
const auto beamIdx = tid;
232+
int firstIdx = 0;
233+
int lastIdx = 0;
234+
for (int sampleIdx = 0; sampleIdx < samplesPerBeam; ++sampleIdx) {
235+
if (beamSamples.isHit[beamIdx * samplesPerBeam + sampleIdx] == 0) {
236+
continue;
237+
}
238+
if (beamSamples.distance[beamIdx * samplesPerBeam + sampleIdx] <
239+
beamSamples.distance[beamIdx * samplesPerBeam + firstIdx]) {
240+
firstIdx = sampleIdx;
241+
}
242+
if (beamSamples.distance[beamIdx * samplesPerBeam + sampleIdx] >
243+
beamSamples.distance[beamIdx * samplesPerBeam + lastIdx]) {
244+
lastIdx = sampleIdx;
245+
}
246+
}
247+
first.xyz[beamIdx] = beamSamples.xyz[beamIdx * samplesPerBeam + firstIdx];
248+
first.distance[beamIdx] = beamSamples.distance[beamIdx * samplesPerBeam + firstIdx];
249+
last.xyz[beamIdx] = beamSamples.xyz[beamIdx * samplesPerBeam + lastIdx];
250+
last.distance[beamIdx] = beamSamples.distance[beamIdx * samplesPerBeam + lastIdx];
251+
}
252+
253+
226254
void gpuFindCompaction(cudaStream_t stream, size_t pointCount, const int32_t* shouldCompact,
227255
CompactionIndexType* hitCountInclusive, size_t* outHitCount)
228256
{
@@ -294,3 +322,9 @@ void gpuRadarComputeEnergy(cudaStream_t stream, size_t count, float rayAzimuthSt
294322
run(kRadarComputeEnergy, stream, count, rayAzimuthStepRad, rayElevationStepRad, freq, lookAtOriginTransform, rayPose,
295323
hitDist, hitNorm, hitPos, outBUBRFactor);
296324
}
325+
326+
void gpuProcessBeamSamplesFirstLast(cudaStream_t stream, size_t beamCount, int samplesPerBeam, MultiReturnPointers beamSamples,
327+
MultiReturnPointers first, MultiReturnPointers last)
328+
{
329+
run(kProcessBeamSamplesFirstLast, stream, beamCount, samplesPerBeam, beamSamples, first, last);
330+
}

src/gpu/nodeKernels.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <math/Mat3x4f.hpp>
2323
#include <RGLFields.hpp>
2424
#include <thrust/complex.h>
25+
#include <gpu/MultiReturn.hpp>
2526

2627
/*
2728
* The following functions are asynchronous!
@@ -50,4 +51,6 @@ void gpuFilterGroundPoints(cudaStream_t stream, size_t pointCount, const Vec3f s
5051
void gpuRadarComputeEnergy(cudaStream_t stream, size_t count, float rayAzimuthStepRad, float rayElevationStepRad, float freq,
5152
Mat3x4f lookAtOriginTransform, const Field<RAY_POSE_MAT3x4_F32>::type* rayPose,
5253
const Field<DISTANCE_F32>::type* hitDist, const Field<NORMAL_VEC3_F32>::type* hitNorm,
53-
const Field<XYZ_VEC3_F32>::type* hitPos, Vector<3, thrust::complex<float>>* outBUBRFactor);
54+
const Field<XYZ_VEC3_F32>::type* hitPos, Vector<3, thrust::complex<float>>* outBUBRFactor);
55+
void gpuProcessBeamSamplesFirstLast(cudaStream_t stream, size_t beamCount, int samplesPerBeam, MultiReturnPointers beamSamples,
56+
MultiReturnPointers first, MultiReturnPointers last);

src/graph/RaytraceNode.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ void RaytraceNode::enqueueExecImpl()
132132
std::size_t pipelineArgsSize = requestCtxDev->getSizeOf() * requestCtxDev->getCount();
133133
CHECK_OPTIX(optixLaunch(Optix::getOrCreate().pipeline, getStreamHandle(), pipelineArgsPtr, pipelineArgsSize, &sceneSBT,
134134
launchDims.x, launchDims.y, launchDims.y));
135+
136+
gpuProcessBeamSamplesFirstLast(getStreamHandle(), raysNode->getRayCount(), MULTI_RETURN_BEAM_SAMPLES,
137+
mrSamples.getPointers(), mrFirst.getPointers(), mrLast.getPointers());
135138
}
136139

137140
void RaytraceNode::setFields(const std::set<rgl_field_t>& fields)

0 commit comments

Comments
 (0)