Skip to content

Commit df5cf48

Browse files
authored
Merge pull request #1076 from beomki-yeo/remove-exclusive-scan
Remove exclusive_scan kernel in the greedy ambiguity resolver
2 parents a804d60 + 651302d commit df5cf48

File tree

6 files changed

+46
-157
lines changed

6 files changed

+46
-157
lines changed

device/common/include/traccc/ambiguity_resolution/device/exclusive_scan.hpp

Lines changed: 0 additions & 49 deletions
This file was deleted.

device/cuda/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,6 @@ traccc_add_library( traccc_cuda cuda TYPE SHARED
9191
"src/ambiguity_resolution/kernels/count_removable_tracks.cuh"
9292
"src/ambiguity_resolution/kernels/count_shared_measurements.cu"
9393
"src/ambiguity_resolution/kernels/count_shared_measurements.cuh"
94-
"src/ambiguity_resolution/kernels/exclusive_scan.cu"
95-
"src/ambiguity_resolution/kernels/exclusive_scan.cuh"
9694
"src/ambiguity_resolution/kernels/find_max_shared.cu"
9795
"src/ambiguity_resolution/kernels/find_max_shared.cuh"
9896
"src/ambiguity_resolution/kernels/fill_inverted_ids.cu"

device/cuda/src/ambiguity_resolution/greedy_ambiguity_resolution_algorithm.cu

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include "./kernels/block_inclusive_scan.cuh"
1313
#include "./kernels/count_removable_tracks.cuh"
1414
#include "./kernels/count_shared_measurements.cuh"
15-
#include "./kernels/exclusive_scan.cuh"
1615
#include "./kernels/fill_inverted_ids.cuh"
1716
#include "./kernels/fill_track_candidates.cuh"
1817
#include "./kernels/fill_tracks_per_measurement.cuh"
@@ -462,14 +461,6 @@ greedy_ambiguity_resolution_algorithm::operator()(
462461
.meas_to_remove_view = meas_to_remove_buffer,
463462
.threads_view = threads_buffer});
464463

465-
kernels::exclusive_scan<<<1, 1024, 0, stream>>>(
466-
device::exclusive_scan_payload{
467-
.terminate = terminate_device.get(),
468-
.n_removable_tracks = n_removable_tracks_device.get(),
469-
.n_meas_to_remove = n_meas_to_remove_device.get(),
470-
.meas_to_remove_view = meas_to_remove_buffer,
471-
.threads_view = threads_buffer});
472-
473464
kernels::remove_tracks<<<1, 1024, 0, stream>>>(
474465
device::remove_tracks_payload{
475466
.sorted_ids_view = sorted_ids_buffer,

device/cuda/src/ambiguity_resolution/kernels/count_removable_tracks.cu

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ __launch_bounds__(512) __global__ void count_removable_tracks(
7070
__shared__ int shared_n_meas[512];
7171
__shared__ measurement_id_type sh_meas_ids[512];
7272
__shared__ unsigned int sh_threads[512];
73+
__shared__ int prefix[512];
7374
__shared__ unsigned int n_meas_total;
7475
__shared__ unsigned int bound;
7576
__shared__ unsigned int n_tracks_to_iterate;
@@ -237,6 +238,51 @@ __launch_bounds__(512) __global__ void count_removable_tracks(
237238
if (threadIndex == 0) {
238239
*(payload.n_meas_to_remove) = n_meas_total;
239240
}
241+
242+
__syncthreads();
243+
244+
auto n_meas_to_remove_temp = *(payload.n_meas_to_remove);
245+
246+
if (threadIndex == 0) {
247+
*(payload.n_meas_to_remove) = 0;
248+
}
249+
250+
__syncthreads();
251+
252+
int is_valid =
253+
(threads[threadIndex] < *(payload.n_removable_tracks)) ? 1 : 0;
254+
255+
// TODO: Use better reduction algorithm
256+
if (is_valid) {
257+
atomicAdd(payload.n_meas_to_remove, 1);
258+
}
259+
260+
__syncthreads();
261+
262+
// Exclusive scan (Hillis-Steele)
263+
prefix[threadIndex] = is_valid; // copy input
264+
__syncthreads();
265+
266+
for (int offset = 1; offset < n_meas_to_remove_temp; offset <<= 1) {
267+
int val = 0;
268+
if (threadIndex >= offset) {
269+
val = prefix[threadIndex - offset];
270+
}
271+
__syncthreads();
272+
prefix[threadIndex] += val;
273+
__syncthreads();
274+
}
275+
276+
if (is_valid) {
277+
prefix[threadIndex] -= 1;
278+
sh_meas_ids[prefix[threadIndex]] = meas_to_remove[threadIndex];
279+
sh_threads[prefix[threadIndex]] = threads[threadIndex];
280+
}
281+
282+
__syncthreads();
283+
284+
meas_to_remove[threadIndex] = sh_meas_ids[threadIndex];
285+
threads[threadIndex] = sh_threads[threadIndex];
240286
}
241287

242288
} // namespace traccc::cuda::kernels

device/cuda/src/ambiguity_resolution/kernels/exclusive_scan.cu

Lines changed: 0 additions & 81 deletions
This file was deleted.

device/cuda/src/ambiguity_resolution/kernels/exclusive_scan.cuh

Lines changed: 0 additions & 16 deletions
This file was deleted.

0 commit comments

Comments
 (0)