Skip to content

Commit f46028d

Browse files
committed
Fix multi-GPU error for the linear predict kernel in SYCL.
1 parent 56d09ea commit f46028d

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

include/plssvm/backends/SYCL/kernel/predict/basic/predict_kernel.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class device_kernel_w_linear {
7878
const auto global_class_idx = class_idx + static_cast<std::size_t>(internal_class);
7979
const auto global_feature_idx = feature_idx + static_cast<std::size_t>(internal_feature);
8080

81-
temp[internal_feature][internal_class] += alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_uz) + sv] * sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_uz) + sv + sv_offset_];
81+
temp[internal_feature][internal_class] += alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_uz) + sv + sv_offset_] * sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_uz) + sv];
8282
}
8383
}
8484
}

include/plssvm/backends/SYCL/kernel/predict/hierarchical/predict_kernel.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ class device_kernel_w_linear {
6060
*/
6161
void operator()(::sycl::group<2> group) const {
6262
// allocate shared memory
63-
real_type data_cache_feature[FEATURE_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE];
64-
real_type data_cache_alpha[FEATURE_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE];
63+
real_type data_cache_feature[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE];
64+
real_type data_cache_alpha[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE];
6565

6666
// calculate the indices used in the current work-item
6767
::sycl::private_memory<std::size_t, 2> feature_idx{ group };
@@ -114,8 +114,8 @@ class device_kernel_w_linear {
114114
const auto global_class_idx = class_idx_linear(idx) + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_uz;
115115
const auto global_feature_idx = feature_idx_linear(idx) + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_uz;
116116

117-
data_cache_feature[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_uz) + sv + sv_offset_ + threadIdx_x]; // SoA
118-
data_cache_alpha[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_uz) + sv + threadIdx_x]; // AoS
117+
data_cache_feature[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_uz) + sv + threadIdx_x]; // SoA
118+
data_cache_alpha[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_uz) + sv + sv_offset_ + threadIdx_x]; // AoS
119119
}
120120
});
121121

include/plssvm/backends/SYCL/kernel/predict/scoped/predict_kernel.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ class device_kernel_w_linear {
6262
template <typename T>
6363
void operator()(T group) const {
6464
::sycl::memory_environment(group,
65-
::sycl::require_local_mem<real_type[FEATURE_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(),
66-
::sycl::require_local_mem<real_type[FEATURE_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(),
65+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(),
66+
::sycl::require_local_mem<real_type[THREAD_BLOCK_SIZE][INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE]>(),
6767
::sycl::require_private_mem<std::size_t>(),
6868
::sycl::require_private_mem<std::size_t>(),
6969
::sycl::require_private_mem<std::size_t>(),
@@ -104,8 +104,8 @@ class device_kernel_w_linear {
104104
const auto global_class_idx = class_idx_linear(idx) + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_uz;
105105
const auto global_feature_idx = feature_idx_linear(idx) + static_cast<std::size_t>(internal) * THREAD_BLOCK_SIZE_uz;
106106

107-
data_cache_feature[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_uz) + sv + sv_offset_ + threadIdx_x]; // SoA
108-
data_cache_alpha[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_uz) + sv + threadIdx_x]; // AoS
107+
data_cache_feature[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_uz) + sv + threadIdx_x]; // SoA
108+
data_cache_alpha[local_id_0][internal * THREAD_BLOCK_SIZE + local_id_1] = alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_uz) + sv + sv_offset_ + threadIdx_x]; // AoS
109109
}
110110
});
111111

0 commit comments

Comments
 (0)