1818#include " plssvm/detail/assert.hpp" // PLSSVM_ASSERT
1919#include " plssvm/detail/data_distribution.hpp" // plssvm::detail::{data_distribution, triangular_data_distribution, rectangular_data_distribution}
2020#include " plssvm/detail/move_only_any.hpp" // plssvm::detail::{move_only_any, move_only_any_cast}
21+ #include " plssvm/detail/ssize.hpp" // plssvm::detail::ssize_t
2122#include " plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type
2223#include " plssvm/matrix.hpp" // plssvm::aos_matrix, plssvm::soa_matrix
2324#include " plssvm/parameter.hpp" // plssvm::parameter
@@ -255,7 +256,7 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
255256
256257 // split memory allocation and memory copy! (necessary to remove locks on some systems and setups)
257258#pragma omp parallel for if (num_devices > 1)
258- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
259+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
259260 // check whether the current device is responsible for at least one data point!
260261 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
261262 continue ;
@@ -271,7 +272,7 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
271272 const pinned_memory_type pm{ A };
272273
273274#pragma omp parallel for if (num_devices > 1)
274- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
275+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
275276 // check whether the current device is responsible for at least one data point!
276277 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
277278 continue ;
@@ -346,7 +347,7 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
346347
347348 // split memory allocation and memory copy!
348349#pragma omp parallel for if (num_devices > 1)
349- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
350+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
350351 // check whether the current device is responsible for at least one data point!
351352 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
352353 continue ;
@@ -359,7 +360,7 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
359360 }
360361
361362#pragma omp parallel for ordered if (num_devices > 1)
362- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
363+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
363364 // check whether the current device is responsible for at least one data point!
364365 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
365366 continue ;
@@ -504,14 +505,14 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
504505
505506 // split memory allocation and memory copy!
506507#pragma omp parallel for if (num_devices > 1)
507- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
508+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
508509 const queue_type &device = devices_[device_id];
509510
510511 // allocate memory on the device
511512 alpha_d[device_id] = device_ptr_type{ alpha.shape (), alpha.padding (), device };
512513 }
513514#pragma omp parallel for if (num_devices > 1)
514- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
515+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
515516 // copy data to the device
516517 alpha_d[device_id].copy_to_device (alpha);
517518 }
@@ -532,7 +533,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
532533 std::vector<device_ptr_type> sv_d (num_devices);
533534 // split memory allocation and memory copy!
534535#pragma omp parallel for if (num_devices > 1)
535- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
536+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
536537 // check whether the current device is responsible for at least one data point!
537538 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
538539 continue ;
@@ -544,7 +545,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
544545 }
545546
546547#pragma omp parallel for ordered if (num_devices > 1)
547- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
548+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
548549 // check whether the current device is responsible for at least one data point!
549550 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
550551 continue ;
@@ -599,29 +600,29 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
599600 // upload the w vector to all devices
600601 // split memory allocation and memory copy!
601602#pragma omp parallel for if (num_devices > 1)
602- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
603+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
603604 const queue_type &device = devices_[device_id];
604605
605606 // allocate memory on the device
606607 sv_or_w_d[device_id] = device_ptr_type{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, device };
607608 }
608609#pragma omp parallel for if (num_devices > 1)
609- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
610+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
610611 // copy data to the device
611612 sv_or_w_d[device_id].copy_to_device (w);
612613 }
613614 } else {
614615 // use the support vectors for all other kernel functions
615616 // split memory allocation and memory copy!
616617#pragma omp parallel for if (num_devices > 1)
617- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
618+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
618619 const queue_type &device = devices_[device_id];
619620
620621 // allocate memory on the device
621622 sv_or_w_d[device_id] = device_ptr_type{ support_vectors.shape (), support_vectors.padding (), device };
622623 }
623624#pragma omp parallel for if (num_devices > 1)
624- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
625+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
625626 // copy data to the device
626627 sv_or_w_d[device_id].copy_to_device (support_vectors);
627628 }
@@ -637,7 +638,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
637638
638639 // split memory allocation and memory copy!
639640#pragma omp parallel for if (num_devices > 1)
640- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
641+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
641642 // check whether the current device is responsible for at least one data point!
642643 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
643644 continue ;
@@ -651,7 +652,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
651652 }
652653
653654#pragma omp parallel for if (num_devices > 1)
654- for (std:: size_t device_id = 0 ; device_id < num_devices; ++device_id) {
655+ for (ssize_t device_id = 0 ; device_id < num_devices; ++device_id) {
655656 // check whether the current device is responsible for at least one data point!
656657 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
657658 continue ;
0 commit comments