Skip to content

Commit f7dd029

Browse files
authored
Merge pull request #1102 from beomki-yeo/reduce-atomic-add
Reduce the number of atomic Add operations
2 parents 6276e79 + 9baafad commit f7dd029

File tree

1 file changed

+14
-8
lines changed

1 file changed

+14
-8
lines changed

device/cuda/src/ambiguity_resolution/kernels/count_removable_tracks.cu

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,12 @@ __launch_bounds__(512) __global__ void count_removable_tracks(
113113

114114
__syncthreads();
115115

116+
unsigned int trk_id = 0;
117+
unsigned int n_m = 0;
116118
if (gid >= 0) {
117-
shared_n_meas[threadIndex] = n_meas[sorted_ids[gid]];
119+
trk_id = sorted_ids[gid];
120+
n_m = n_meas[trk_id];
121+
shared_n_meas[threadIndex] = n_m;
118122
}
119123

120124
__syncthreads();
@@ -148,11 +152,12 @@ __launch_bounds__(512) __global__ void count_removable_tracks(
148152

149153
// @TODO: Improve the logic
150154
if (threadIndex < n_tracks_to_iterate && gid >= 0) {
151-
const auto& mids = meas_ids[sorted_ids[gid]];
152-
for (const auto& id : mids) {
153-
const unsigned int pos = atomicAdd(&n_meas_total, 1);
154-
sh_meas_ids[pos] = id;
155-
sh_threads[pos] = threadIndex;
155+
const unsigned int pos = atomicAdd(&n_meas_total, n_m);
156+
157+
const auto& mids = meas_ids[trk_id];
158+
for (int i = 0; i < n_m; i++) {
159+
sh_meas_ids[pos + i] = mids[i];
160+
sh_threads[pos + i] = threadIndex;
156161
}
157162
}
158163

@@ -197,6 +202,8 @@ __launch_bounds__(512) __global__ void count_removable_tracks(
197202
bool is_start =
198203
(threadIndex == 0) || (sh_meas_ids[threadIndex - 1] != mid);
199204
const auto unique_meas_idx = meas_id_to_unique_id.at(mid);
205+
const auto its_accepted_tracks =
206+
n_accepted_tracks_per_measurement.at(unique_meas_idx);
200207

201208
if (is_start) {
202209

@@ -207,8 +214,7 @@ __launch_bounds__(512) __global__ void count_removable_tracks(
207214
if (sh_threads[i] != sh_threads[i - 1]) {
208215
n_sharing_tracks++;
209216

210-
if (n_sharing_tracks ==
211-
n_accepted_tracks_per_measurement.at(unique_meas_idx)) {
217+
if (n_sharing_tracks == its_accepted_tracks) {
212218
atomicMin(&min_thread, sh_threads[i - 1]);
213219
break;
214220
}

0 commit comments

Comments
 (0)