Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.

Commit 2318a46

Browse files
author
Mikolaj Komar
committed
Improve performance of eq distribution gather
1 parent e42cfa2 commit 2318a46

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

include/dr/mp/containers/matrix_formats/csr_eq_distribution.hpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,22 @@ class csr_eq_distribution {
190190
auto first_row = row_offsets_[i];
191191
auto last_row = row_offsets_[i] + row_sizes_[i];
192192
auto row_size = row_sizes_[i];
193-
for (auto j = first_row; j < last_row; j++) {
194-
res[j + k * shape_[0]] +=
195-
gathered_res_host[vals_width * current_offset + k * row_size +
196-
j - first_row];
193+
if (first_row < last_row) {
194+
res[first_row + k * shape_[0]] +=
195+
gathered_res_host[vals_width * current_offset + k * row_size];
197196
}
197+
if (first_row < last_row - 1) {
198+
auto piece_start = gathered_res_host + vals_width * current_offset +
199+
k * row_size + 1;
200+
std::copy(piece_start, piece_start + last_row - first_row - 1,
201+
res.begin() + first_row + k * shape_[0] + 1);
202+
}
203+
// for (auto j = first_row; j < last_row; j++) {
204+
// res[j + k * shape_[0]] +=
205+
// gathered_res_host[vals_width * current_offset + k * row_size
206+
// +
207+
// j - first_row];
208+
// }
198209
current_offset += row_sizes_[i];
199210
}
200211
}

0 commit comments

Comments
 (0)