Skip to content

Commit b3838e3

Browse files
committed
Add more ssize_t casts.
1 parent 9e34e11 commit b3838e3

18 files changed

+124
-114
lines changed

include/plssvm/backends/OpenMP/kernel/cg_explicit/blas.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ inline void device_kernel_symm(const std::size_t num_rows, const std::size_t num
4343
PLSSVM_ASSERT(C.shape() == (plssvm::shape{ num_rhs, num_rows }), "C matrix sizes mismatch!: {} != [{}, {}]", C.shape(), num_rhs, num_rows);
4444

4545
// calculate constants
46-
const auto blocked_num_rhs = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
47-
const auto blocked_num_rows = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
46+
const auto blocked_num_rhs = static_cast<plssvm::detail::ssize_t>(std::ceil(static_cast<real_type>(num_rhs) / INTERNAL_BLOCK_SIZE));
47+
const auto blocked_num_rows = static_cast<plssvm::detail::ssize_t>(std::ceil(static_cast<real_type>(num_rows) / INTERNAL_BLOCK_SIZE));
4848

4949
// cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
5050
const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);

include/plssvm/backends/OpenMP/kernel/cg_explicit/kernel_matrix_assembly.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void device_kernel_assembly(const std::vector<real_type> &q, std::vector<real_ty
4646

4747
// calculate constants
4848
const std::size_t dept = q.size();
49-
const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
49+
const auto blocked_dept = static_cast<plssvm::detail::ssize_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
5050
const std::size_t num_features = data.num_cols();
5151

5252
// cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows

include/plssvm/backends/OpenMP/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ inline void device_kernel_assembly_symm(const real_type alpha, const std::vector
5555

5656
// calculate constants
5757
const std::size_t dept = q.size();
58-
const auto blocked_dept = static_cast<std::size_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
58+
const auto blocked_dept = static_cast<plssvm::detail::ssize_t>(std::ceil(static_cast<real_type>(dept) / INTERNAL_BLOCK_SIZE));
5959
const std::size_t num_features = data.num_cols();
6060
const std::size_t num_classes = B.num_rows();
6161

include/plssvm/backends/OpenMP/kernel/predict_kernel.hpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ inline void device_kernel_w_linear(soa_matrix<real_type> &w, const aos_matrix<re
3838
PLSSVM_ASSERT(w.shape() == (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }), "Shape mismatch: {} vs {}!", w.shape(), (plssvm::shape{ alpha.num_rows(), support_vectors.num_cols() }));
3939

4040
// calculate constants
41-
const std::size_t num_classes = alpha.num_rows();
42-
const std::size_t num_support_vectors = support_vectors.num_rows();
43-
const std::size_t num_features = support_vectors.num_cols();
41+
const auto num_classes = static_cast<plssvm::detail::ssize_t>(alpha.num_rows());
42+
const auto num_support_vectors = static_cast<plssvm::detail::ssize_t>(support_vectors.num_rows());
43+
const auto num_features = static_cast<plssvm::detail::ssize_t>(support_vectors.num_cols());
4444

4545
#pragma omp parallel for collapse(2) default(none) shared(w, support_vectors, alpha) firstprivate(num_classes, num_features, num_support_vectors)
4646
for (plssvm::detail::ssize_t a = 0; a < num_classes; ++a) {
@@ -68,9 +68,9 @@ inline void device_kernel_predict_linear(aos_matrix<real_type> &prediction, cons
6868
PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), w.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), w.num_rows() }));
6969

7070
// calculate constants
71-
const std::size_t num_classes = prediction.num_cols();
72-
const std::size_t num_predict_points = predict_points.num_rows();
73-
const std::size_t num_features = predict_points.num_cols();
71+
const auto num_classes = static_cast<plssvm::detail::ssize_t>(prediction.num_cols());
72+
const auto num_predict_points = static_cast<plssvm::detail::ssize_t>(predict_points.num_rows());
73+
const auto num_features = static_cast<plssvm::detail::ssize_t>(predict_points.num_cols());
7474

7575
#pragma omp parallel for collapse(2) default(none) shared(prediction, w, rho, predict_points) firstprivate(num_classes, num_features, num_predict_points)
7676
for (plssvm::detail::ssize_t point_index = 0; point_index < num_predict_points; ++point_index) {
@@ -104,12 +104,12 @@ inline void device_kernel_predict(aos_matrix<real_type> &prediction, const aos_m
104104
PLSSVM_ASSERT(prediction.shape() == (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }), "Shape mismatch: {} vs {}!", prediction.shape(), (plssvm::shape{ predict_points.num_rows(), alpha.num_rows() }));
105105

106106
// calculate constants
107-
const std::size_t num_classes = alpha.num_rows();
108-
const std::size_t num_support_vectors = support_vectors.num_rows();
109-
const auto blocked_num_support_vectors = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
110-
const std::size_t num_predict_points = predict_points.num_rows();
111-
const auto blocked_num_predict_points = static_cast<std::size_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
112-
const std::size_t num_features = predict_points.num_cols();
107+
const auto num_classes = static_cast<plssvm::detail::ssize_t>(alpha.num_rows());
108+
const auto num_support_vectors = static_cast<plssvm::detail::ssize_t>(support_vectors.num_rows());
109+
const auto blocked_num_support_vectors = static_cast<plssvm::detail::ssize_t>(std::ceil(static_cast<real_type>(num_support_vectors) / INTERNAL_BLOCK_SIZE));
110+
const auto num_predict_points = static_cast<plssvm::detail::ssize_t>(predict_points.num_rows());
111+
const auto blocked_num_predict_points = static_cast<plssvm::detail::ssize_t>(std::ceil(static_cast<real_type>(num_predict_points) / INTERNAL_BLOCK_SIZE));
112+
const auto num_features = static_cast<plssvm::detail::ssize_t>(predict_points.num_cols());
113113

114114
// cast all values to 64-bit unsigned long long to prevent potential 32-bit overflows
115115
const auto INTERNAL_BLOCK_SIZE_uz = static_cast<std::size_t>(INTERNAL_BLOCK_SIZE);

include/plssvm/backends/gpu_csvm.hpp

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
242242
PLSSVM_ASSERT(q_red.size() == A.num_rows() - 1, "The q_red size ({}) mismatches the number of data points after dimensional reduction ({})!", q_red.size(), A.num_rows() - 1);
243243

244244
const std::size_t num_devices = this->num_available_devices();
245+
const auto signed_num_devices = static_cast<ssize_t>(num_devices);
245246
const std::size_t num_rows_reduced = A.shape().x - 1;
246247

247248
// update the data distribution: only the upper triangular kernel matrix is used
@@ -255,8 +256,8 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
255256
std::vector<device_ptr_type> q_red_d(num_devices);
256257

257258
// split memory allocation and memory copy! (necessary to remove locks on some systems and setups)
258-
#pragma omp parallel for if (num_devices > 1)
259-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
259+
#pragma omp parallel for if (signed_num_devices > 1)
260+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
260261
// check whether the current device is responsible for at least one data point!
261262
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
262263
continue;
@@ -271,8 +272,8 @@ std::vector<::plssvm::detail::move_only_any> gpu_csvm<device_ptr_t, queue_t, pin
271272
// pin the data matrix
272273
const pinned_memory_type pm{ A };
273274

274-
#pragma omp parallel for if (num_devices > 1)
275-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
275+
#pragma omp parallel for if (signed_num_devices > 1)
276+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
276277
// check whether the current device is responsible for at least one data point!
277278
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
278279
continue;
@@ -334,6 +335,7 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
334335
PLSSVM_ASSERT(B.padding() == C.padding(), "The B ({}) and C ({}) matrices must have the same padding!", B.padding(), C.padding());
335336

336337
const std::size_t num_devices = this->num_available_devices();
338+
const auto signed_num_devices = static_cast<ssize_t>(num_devices);
337339

338340
// the C and B matrices; completely stored on each device
339341
std::vector<device_ptr_type> B_d(num_devices);
@@ -346,8 +348,8 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
346348
}
347349

348350
// split memory allocation and memory copy!
349-
#pragma omp parallel for if (num_devices > 1)
350-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
351+
#pragma omp parallel for if (signed_num_devices > 1)
352+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
351353
// check whether the current device is responsible for at least one data point!
352354
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
353355
continue;
@@ -359,8 +361,8 @@ void gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::blas_level_3(const solver
359361
C_d[device_id] = device_ptr_type{ C.shape(), C.padding(), device };
360362
}
361363

362-
#pragma omp parallel for ordered if (num_devices > 1)
363-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
364+
#pragma omp parallel for ordered if (signed_num_devices > 1)
365+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
364366
// check whether the current device is responsible for at least one data point!
365367
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
366368
continue;
@@ -495,6 +497,7 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
495497
const std::size_t num_support_vectors = support_vectors.num_rows();
496498
const std::size_t num_features = predict_points.num_cols();
497499
const std::size_t num_devices = this->num_available_devices();
500+
const auto signed_num_devices = static_cast<ssize_t>(num_devices);
498501

499502
// the result matrix
500503
aos_matrix<real_type> out_ret{ shape{ num_predict_points, num_classes }, real_type{ 0.0 }, shape{ PADDING_SIZE, PADDING_SIZE } };
@@ -504,8 +507,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
504507
std::vector<device_ptr_type> alpha_d(num_devices);
505508

506509
// split memory allocation and memory copy!
507-
#pragma omp parallel for if (num_devices > 1)
508-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
510+
#pragma omp parallel for if (signed_num_devices > 1)
511+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
509512
const queue_type &device = devices_[device_id];
510513

511514
// allocate memory on the device
@@ -532,8 +535,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
532535

533536
std::vector<device_ptr_type> sv_d(num_devices);
534537
// split memory allocation and memory copy!
535-
#pragma omp parallel for if (num_devices > 1)
536-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
538+
#pragma omp parallel for if (signed_num_devices > 1)
539+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
537540
// check whether the current device is responsible for at least one data point!
538541
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
539542
continue;
@@ -544,8 +547,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
544547
sv_d[device_id] = device_ptr_type{ shape{ data_distribution_->place_specific_num_rows(device_id), num_features }, support_vectors.padding(), device };
545548
}
546549

547-
#pragma omp parallel for ordered if (num_devices > 1)
548-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
550+
#pragma omp parallel for ordered if (signed_num_devices > 1)
551+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
549552
// check whether the current device is responsible for at least one data point!
550553
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
551554
continue;
@@ -599,30 +602,30 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
599602

600603
// upload the w vector to all devices
601604
// split memory allocation and memory copy!
602-
#pragma omp parallel for if (num_devices > 1)
603-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
605+
#pragma omp parallel for if (signed_num_devices > 1)
606+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
604607
const queue_type &device = devices_[device_id];
605608

606609
// allocate memory on the device
607610
sv_or_w_d[device_id] = device_ptr_type{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, device };
608611
}
609-
#pragma omp parallel for if (num_devices > 1)
610-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
612+
#pragma omp parallel for if (signed_num_devices > 1)
613+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
611614
// copy data to the device
612615
sv_or_w_d[device_id].copy_to_device(w);
613616
}
614617
} else {
615618
// use the support vectors for all other kernel functions
616619
// split memory allocation and memory copy!
617-
#pragma omp parallel for if (num_devices > 1)
618-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
620+
#pragma omp parallel for if (signed_num_devices > 1)
621+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
619622
const queue_type &device = devices_[device_id];
620623

621624
// allocate memory on the device
622625
sv_or_w_d[device_id] = device_ptr_type{ support_vectors.shape(), support_vectors.padding(), device };
623626
}
624-
#pragma omp parallel for if (num_devices > 1)
625-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
627+
#pragma omp parallel for if (signed_num_devices > 1)
628+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
626629
// copy data to the device
627630
sv_or_w_d[device_id].copy_to_device(support_vectors);
628631
}
@@ -637,8 +640,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
637640
std::vector<device_ptr_type> rho_d(num_devices);
638641

639642
// split memory allocation and memory copy!
640-
#pragma omp parallel for if (num_devices > 1)
641-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
643+
#pragma omp parallel for if (signed_num_devices > 1)
644+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
642645
// check whether the current device is responsible for at least one data point!
643646
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
644647
continue;
@@ -651,8 +654,8 @@ aos_matrix<real_type> gpu_csvm<device_ptr_t, queue_t, pinned_memory_t>::predict_
651654
rho_d[device_id] = device_ptr_type{ num_classes + PADDING_SIZE, device };
652655
}
653656

654-
#pragma omp parallel for if (num_devices > 1)
655-
for (ssize_t device_id = 0; device_id < num_devices; ++device_id) {
657+
#pragma omp parallel for if (signed_num_devices > 1)
658+
for (ssize_t device_id = 0; device_id < signed_num_devices; ++device_id) {
656659
// check whether the current device is responsible for at least one data point!
657660
if (data_distribution_->place_specific_num_rows(device_id) == 0) {
658661
continue;

include/plssvm/data_set/classification_data_set.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ void classification_data_set<U>::map_label() {
386386
aos_matrix<real_type> tmp{ shape{ mapper.num_mappings(), labels_ptr_->size() }, real_type{ -1.0 } };
387387

388388
#pragma omp parallel for collapse(2)
389-
for (detail::ssize_t label = 0; label < tmp.num_rows(); ++label) {
390-
for (detail::ssize_t i = 0; i < tmp.num_cols(); ++i) {
389+
for (detail::ssize_t label = 0; label < static_cast<detail::ssize_t>(tmp.num_rows()); ++label) {
390+
for (detail::ssize_t i = 0; i < static_cast<detail::ssize_t>(tmp.num_cols()); ++i) {
391391
if (label == mapper.get_mapped_index_by_label((*labels_ptr_)[i])) {
392392
tmp(label, i) = real_type{ 1.0 };
393393
}

include/plssvm/data_set/min_max_scaler.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "plssvm/detail/io/file_reader.hpp" // plssvm::detail::io::file_reader
1818
#include "plssvm/detail/io/scaling_factors_parsing.hpp" // plssvm::detail::io::parse_scaling_factors
1919
#include "plssvm/detail/logging.hpp" // plssvm::detail::log
20+
#include "plssvm/detail/ssize.hpp" // plssvm::detail::ssize_t
2021
#include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking_entry
2122
#include "plssvm/exceptions/exceptions.hpp" // plssvm::min_max_scaler_exception
2223
#include "plssvm/matrix.hpp" // plssvm::matrix, plssvm::layout_type
@@ -47,6 +48,8 @@ class min_max_scaler {
4748
struct factors {
4849
/// The used size type.
4950
using size_type = std::size_t;
51+
/// The used signed size type.
52+
using ssize_type = detail::ssize_t;
5053

5154
/**
5255
* @brief Default construct new scaling factors.
@@ -168,6 +171,7 @@ void min_max_scaler::scale(plssvm::matrix<real_type, layout> &data) {
168171
const std::chrono::time_point start_time = std::chrono::steady_clock::now();
169172

170173
using size_type = typename plssvm::matrix<real_type, layout>::size_type;
174+
using ssize_type = typename plssvm::matrix<real_type, layout>::ssize_type;
171175
const size_type num_data_points = data.num_rows();
172176
const size_type num_features = data.num_cols();
173177

@@ -184,7 +188,7 @@ void min_max_scaler::scale(plssvm::matrix<real_type, layout> &data) {
184188

185189
// calculate min/max values of all data points at the specific feature
186190
#pragma omp parallel for default(shared) firstprivate(feature) reduction(min : min_value) reduction(max : max_value)
187-
for (size_type data_point = 0; data_point < num_data_points; ++data_point) {
191+
for (ssize_type data_point = 0; data_point < static_cast<ssize_type>(num_data_points); ++data_point) {
188192
min_value = std::min(min_value, data(data_point, feature));
189193
max_value = std::max(max_value, data(data_point, feature));
190194
}
@@ -216,7 +220,7 @@ void min_max_scaler::scale(plssvm::matrix<real_type, layout> &data) {
216220

217221
// scale values
218222
#pragma omp parallel for default(shared) firstprivate(lower, upper)
219-
for (size_type i = 0; i < scaling_factors_.size(); ++i) {
223+
for (ssize_type i = 0; i < detail::size(scaling_factors_); ++i) {
220224
// extract feature-wise min and max values
221225
const factors factor = scaling_factors_[i];
222226
// scale data values

include/plssvm/data_set/regression_data_set.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#include "plssvm/data_set/min_max_scaler.hpp" // plssvm::min_max_scaler
1919
#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT
2020
#include "plssvm/detail/logging.hpp" // plssvm::detail::log
21-
#include "plssvm/detail/ssize.hpp" // plssvm::detail::ssize_t
21+
#include "plssvm/detail/ssize.hpp" // plssvm::detail::{ssize_t, ssize}
2222
#include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry
2323
#include "plssvm/detail/type_list.hpp" // plssvm::detail::{supported_label_types_regression, tuple_contains_v}
2424
#include "plssvm/file_format_types.hpp" // plssvm::file_format_type
@@ -237,7 +237,7 @@ void regression_data_set<U>::map_label() {
237237
// convert input labels to now mapped values
238238
std::vector<real_type> labels(labels_ptr_->size());
239239
#pragma omp parallel for
240-
for (detail::ssize_t i = 0; i < labels.size(); ++i) {
240+
for (detail::ssize_t i = 0; i < detail::ssize(labels); ++i) {
241241
labels[i] = static_cast<real_type>((*labels_ptr_)[i]);
242242
}
243243

0 commit comments

Comments
 (0)