@@ -242,6 +242,7 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
242242 PLSSVM_ASSERT (q_red.size () == A.num_rows () - 1 , " The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!" , q_red.size (), A.num_rows () - 1 );
243243
244244 const std::size_t num_devices = this ->num_available_devices ();
245+ const auto signed_num_devices = static_cast <ssize_t >(num_devices);
245246 const std::size_t num_rows_reduced = A.shape ().x - 1 ;
246247
247248 // update the data distribution: only the upper triangular kernel matrix is used
@@ -255,8 +256,8 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
255256 std::vector<device_ptr_type> q_red_d (num_devices);
256257
257258 // split memory allocation and memory copy! (necessary to remove locks on some systems and setups)
258- #pragma omp parallel for if (num_devices > 1)
259- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
259+ #pragma omp parallel for if (signed_num_devices > 1)
260+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
260261 // check whether the current device is responsible for at least one data point!
261262 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
262263 continue ;
@@ -271,8 +272,8 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
271272 // pin the data matrix
272273 const pinned_memory_type pm{ A };
273274
274- #pragma omp parallel for if (num_devices > 1)
275- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
275+ #pragma omp parallel for if (signed_num_devices > 1)
276+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
276277 // check whether the current device is responsible for at least one data point!
277278 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
278279 continue ;
@@ -334,6 +335,7 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
334335 PLSSVM_ASSERT (B.padding () == C.padding (), " The B ({}) and C ({}) matrices must have the same padding!" , B.padding (), C.padding ());
335336
336337 const std::size_t num_devices = this ->num_available_devices ();
338+ const auto signed_num_devices = static_cast <ssize_t >(num_devices);
337339
338340 // the C and B matrices; completely stored on each device
339341 std::vector<device_ptr_type> B_d (num_devices);
@@ -346,8 +348,8 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
346348 }
347349
348350 // split memory allocation and memory copy!
349- #pragma omp parallel for if (num_devices > 1)
350- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
351+ #pragma omp parallel for if (signed_num_devices > 1)
352+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
351353 // check whether the current device is responsible for at least one data point!
352354 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
353355 continue ;
@@ -359,8 +361,8 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
359361 C_d[device_id] = device_ptr_type{ C.shape (), C.padding (), device };
360362 }
361363
362- #pragma omp parallel for ordered if (num_devices > 1)
363- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
364+ #pragma omp parallel for ordered if (signed_num_devices > 1)
365+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
364366 // check whether the current device is responsible for at least one data point!
365367 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
366368 continue ;
@@ -495,6 +497,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
495497 const std::size_t num_support_vectors = support_vectors.num_rows ();
496498 const std::size_t num_features = predict_points.num_cols ();
497499 const std::size_t num_devices = this ->num_available_devices ();
500+ const auto signed_num_devices = static_cast <ssize_t >(num_devices);
498501
499502 // the result matrix
500503 aos_matrix<real_type> out_ret{ shape{ num_predict_points, num_classes }, real_type{ 0.0 }, shape{ PADDING_SIZE, PADDING_SIZE } };
@@ -504,8 +507,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
504507 std::vector<device_ptr_type> alpha_d (num_devices);
505508
506509 // split memory allocation and memory copy!
507- #pragma omp parallel for if (num_devices > 1)
508- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
510+ #pragma omp parallel for if (signed_num_devices > 1)
511+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
509512 const queue_type &device = devices_[device_id];
510513
511514 // allocate memory on the device
@@ -532,8 +535,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
532535
533536 std::vector<device_ptr_type> sv_d (num_devices);
534537 // split memory allocation and memory copy!
535- #pragma omp parallel for if (num_devices > 1)
536- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
538+ #pragma omp parallel for if (signed_num_devices > 1)
539+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
537540 // check whether the current device is responsible for at least one data point!
538541 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
539542 continue ;
@@ -544,8 +547,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
544547 sv_d[device_id] = device_ptr_type{ shape{ data_distribution_->place_specific_num_rows (device_id), num_features }, support_vectors.padding (), device };
545548 }
546549
547- #pragma omp parallel for ordered if (num_devices > 1)
548- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
550+ #pragma omp parallel for ordered if (signed_num_devices > 1)
551+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
549552 // check whether the current device is responsible for at least one data point!
550553 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
551554 continue ;
@@ -599,30 +602,30 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
599602
600603 // upload the w vector to all devices
601604 // split memory allocation and memory copy!
602- #pragma omp parallel for if (num_devices > 1)
603- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
605+ #pragma omp parallel for if (signed_num_devices > 1)
606+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
604607 const queue_type &device = devices_[device_id];
605608
606609 // allocate memory on the device
607610 sv_or_w_d[device_id] = device_ptr_type{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, device };
608611 }
609- #pragma omp parallel for if (num_devices > 1)
610- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
612+ #pragma omp parallel for if (signed_num_devices > 1)
613+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
611614 // copy data to the device
612615 sv_or_w_d[device_id].copy_to_device (w);
613616 }
614617 } else {
615618 // use the support vectors for all other kernel functions
616619 // split memory allocation and memory copy!
617- #pragma omp parallel for if (num_devices > 1)
618- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
620+ #pragma omp parallel for if (signed_num_devices > 1)
621+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
619622 const queue_type &device = devices_[device_id];
620623
621624 // allocate memory on the device
622625 sv_or_w_d[device_id] = device_ptr_type{ support_vectors.shape (), support_vectors.padding (), device };
623626 }
624- #pragma omp parallel for if (num_devices > 1)
625- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
627+ #pragma omp parallel for if (signed_num_devices > 1)
628+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
626629 // copy data to the device
627630 sv_or_w_d[device_id].copy_to_device (support_vectors);
628631 }
@@ -637,8 +640,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
637640 std::vector<device_ptr_type> rho_d (num_devices);
638641
639642 // split memory allocation and memory copy!
640- #pragma omp parallel for if (num_devices > 1)
641- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
643+ #pragma omp parallel for if (signed_num_devices > 1)
644+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
642645 // check whether the current device is responsible for at least one data point!
643646 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
644647 continue ;
@@ -651,8 +654,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
651654 rho_d[device_id] = device_ptr_type{ num_classes + PADDING_SIZE, device };
652655 }
653656
654- #pragma omp parallel for if (num_devices > 1)
655- for (ssize_t device_id = 0 ; device_id < num_devices ; ++device_id) {
657+ #pragma omp parallel for if (signed_num_devices > 1)
658+ for (ssize_t device_id = 0 ; device_id < signed_num_devices ; ++device_id) {
656659 // check whether the current device is responsible for at least one data point!
657660 if (data_distribution_->place_specific_num_rows (device_id) == 0 ) {
658661 continue ;
0 commit comments