Skip to content

Commit 4bd1912

Browse files
committed
Add new specific rvalue constructor overloads for the data set classes.
1 parent 28c7272 commit 4bd1912

File tree

5 files changed

+567
-47
lines changed

5 files changed

+567
-47
lines changed

include/plssvm/data_set/classification_data_set.hpp

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,16 @@ class classification_data_set : public data_set<U> {
9494
base_data_set{ filename, format } { this->init(); }
9595

9696
/**
97-
* @copydoc plssvm::data_set::data_set(const std::string &, plssvm::min_max_scaler)
97+
* @copydoc plssvm::data_set::data_set(const std::string &, min_max_scaler)
9898
*/
99-
classification_data_set(const std::string &filename, min_max_scaler scale_parameter) :
100-
base_data_set{ filename, std::move(scale_parameter) } { this->init(); }
99+
classification_data_set(const std::string &filename, min_max_scaler scaler) :
100+
base_data_set{ filename, std::move(scaler) } { this->init(); }
101101

102102
/**
103-
* @copydoc plssvm::data_set::data_set(const std::string &, file_format_type, plssvm::min_max_scaler)
103+
* @copydoc plssvm::data_set::data_set(const std::string &, file_format_type, min_max_scaler)
104104
*/
105-
classification_data_set(const std::string &filename, file_format_type format, min_max_scaler scale_parameter) :
106-
base_data_set{ filename, format, std::move(scale_parameter) } { this->init(); }
105+
classification_data_set(const std::string &filename, file_format_type format, min_max_scaler scaler) :
106+
base_data_set{ filename, format, std::move(scaler) } { this->init(); }
107107

108108
/**
109109
* @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &)
@@ -118,16 +118,16 @@ class classification_data_set : public data_set<U> {
118118
base_data_set{ data_points, std::move(labels) } { this->init(); }
119119

120120
/**
121-
* @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, plssvm::min_max_scaler)
121+
* @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, min_max_scaler)
122122
*/
123-
classification_data_set(const std::vector<std::vector<real_type>> &data_points, min_max_scaler scale_parameter) :
124-
base_data_set{ data_points, std::move(scale_parameter) } { this->init(); }
123+
classification_data_set(const std::vector<std::vector<real_type>> &data_points, min_max_scaler scaler) :
124+
base_data_set{ data_points, std::move(scaler) } { this->init(); }
125125

126126
/**
127-
* @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, std::vector<label_type>, plssvm::min_max_scaler)
127+
* @copydoc plssvm::data_set::data_set(const std::vector<std::vector<real_type>> &, std::vector<label_type>, min_max_scaler)
128128
*/
129-
classification_data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scale_parameter) :
130-
base_data_set{ data_points, std::move(labels), std::move(scale_parameter) } { this->init(); }
129+
classification_data_set(const std::vector<std::vector<real_type>> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
130+
base_data_set{ data_points, std::move(labels), std::move(scaler) } { this->init(); }
131131

132132
/**
133133
* @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &)
@@ -144,18 +144,42 @@ class classification_data_set : public data_set<U> {
144144
base_data_set{ data_points, std::move(labels) } { this->init(); }
145145

146146
/**
147-
* @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, plssvm::min_max_scaler)
147+
* @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, min_max_scaler)
148148
*/
149149
template <layout_type layout>
150-
classification_data_set(const matrix<real_type, layout> &data_points, min_max_scaler scale_parameter) :
151-
base_data_set{ data_points, std::move(scale_parameter) } { this->init(); }
150+
classification_data_set(const matrix<real_type, layout> &data_points, min_max_scaler scaler) :
151+
base_data_set{ data_points, std::move(scaler) } { this->init(); }
152152

153153
/**
154-
* @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, std::vector<label_type>, plssvm::min_max_scaler)
154+
* @copydoc plssvm::data_set::data_set(const matrix<real_type, layout> &, std::vector<label_type>, min_max_scaler)
155155
*/
156156
template <layout_type layout>
157-
classification_data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scale_parameter) :
158-
base_data_set{ data_points, std::move(labels), std::move(scale_parameter) } { this->init(); }
157+
classification_data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler) :
158+
base_data_set{ data_points, std::move(labels), std::move(scaler) } { this->init(); }
159+
160+
/**
161+
* @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&)
162+
*/
163+
explicit classification_data_set(soa_matrix<real_type> &&data_points) :
164+
base_data_set{ std::move(data_points) } { this->init(); }
165+
166+
/**
167+
* @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, std::vector<label_type> &&)
168+
*/
169+
classification_data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
170+
base_data_set{ std::move(data_points), std::move(labels) } { this->init(); }
171+
172+
/**
173+
* @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, min_max_scaler)
174+
*/
175+
classification_data_set(soa_matrix<real_type> &&data_points, min_max_scaler scaler) :
176+
base_data_set{ std::move(data_points), std::move(scaler) } { this->init(); }
177+
178+
/**
179+
* @copydoc plssvm::data_set::data_set(soa_matrix<real_type> &&, std::vector<label_type> &&, min_max_scaler)
180+
*/
181+
classification_data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler) :
182+
base_data_set{ std::move(data_points), std::move(labels), std::move(scaler) } { this->init(); }
159183

160184
/**
161185
* @copydoc plssvm::data_set::save

include/plssvm/data_set/data_set.hpp

Lines changed: 116 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class data_set {
8282
*/
8383
data_set(const std::string &filename, file_format_type format);
8484
/**
85-
* @brief Read the data points from the file @p filename and scale it using the provided @p scale_parameter.
85+
* @brief Read the data points from the file @p filename and scale it using the provided @p scaler.
8686
* Automatically determines the plssvm::file_format_type based on the file extension.
8787
* @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
8888
* @param[in] filename the file to read the data points from
@@ -104,7 +104,7 @@ class data_set {
104104

105105
/**
106106
* @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
107-
* @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvm::fit!
107+
* @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
108108
* @param[in] data_points the data points used in this data set
109109
* @throws plssvm::data_set_exception if the @p data_points vector is empty
110110
* @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
@@ -146,7 +146,7 @@ class data_set {
146146

147147
/**
148148
* @brief Create a new data set from the provided @p data_points.
149-
* @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvm::fit!
149+
* @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
150150
* @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
151151
* @tparam layout the layout type of the input matrix
152152
* @param[in] data_points the data points used in this data set
@@ -196,9 +196,59 @@ class data_set {
196196
* @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
197197
*/
198198
template <layout_type layout>
199-
data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, scaling scale_parameter);
200199
data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler);
201200

201+
/**
202+
* @brief Use the provided @p data_points in this data set.
203+
* @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
204+
* @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
205+
* @param[in] data_points the data points used in this data set
206+
* @throws plssvm::data_set_exception if the @p data_points vector is empty
207+
* @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
208+
* @throws plssvm::data_set_exception if any @p data_point has no features
209+
* @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
210+
*/
211+
explicit data_set(soa_matrix<real_type> &&data_points);
212+
/**
213+
* @brief Use the provided @p data_points and @p labels in this data set.
214+
* @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
215+
* @param[in] data_points the data points used in this data set
216+
* @param[in] labels the labels used in this data set
217+
* @throws plssvm::data_set_exception if the @p data_points vector is empty
218+
* @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
219+
* @throws plssvm::data_set_exception if any @p data_point has no features
220+
* @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
221+
* @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
222+
*/
223+
data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels);
224+
/**
225+
* @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
226+
* @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
227+
* @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
228+
* @param[in] data_points the data points used in this data set
229+
* @param[in] scaler the parameters used to scale the data set feature values to a given range
230+
* @throws plssvm::data_set_exception if the @p data_points vector is empty
231+
* @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
232+
* @throws plssvm::data_set_exception if any @p data_point has no features
233+
* @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
234+
* @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
235+
*/
236+
data_set(soa_matrix<real_type> &&data_points, min_max_scaler scaler);
237+
/**
238+
* @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
239+
* @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
240+
* @param[in] data_points the data points used in this data set
241+
* @param[in] labels the labels used in this data set
242+
* @param[in] scaler the parameters used to scale the data set feature values to a given range
243+
* @throws plssvm::data_set_exception if the @p data_points vector is empty
244+
* @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
245+
* @throws plssvm::data_set_exception if any @p data_point has no features
246+
* @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
247+
* @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
248+
* @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
249+
*/
250+
data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler);
251+
202252
/**
203253
* @brief Default copy constructor.
204254
*/
@@ -409,6 +459,13 @@ data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<
409459
num_features_{ data_points.num_cols() },
410460
data_ptr_{ std::make_shared<soa_matrix<real_type>>(data_points, shape{ PADDING_SIZE, PADDING_SIZE }) },
411461
labels_ptr_{ std::make_shared<std::vector<label_type>>(std::move(labels)) } {
462+
// the provided data points vector may not be empty
463+
if (data_ptr_->num_rows() == 0) {
464+
throw data_set_exception{ "Data vector is empty!" };
465+
}
466+
if (data_ptr_->num_cols() == 0) {
467+
throw data_set_exception{ "No features provided for the data points!" };
468+
}
412469
// the number of labels must be equal to the number of data points!
413470
if (data_ptr_->num_rows() != labels_ptr_->size()) {
414471
throw data_set_exception{ fmt::format("Number of labels ({}) must match the number of data points ({})!", labels_ptr_->size(), data_ptr_->num_rows()) };
@@ -427,19 +484,71 @@ data_set<U>::data_set(const matrix<real_type, layout> &data_points, min_max_scal
427484

428485
template <typename U>
429486
template <layout_type layout>
430-
data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, scaling scale_parameter) :
431487
data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scale_parameter) :
432488
data_set{ data_points, std::move(labels) } {
433489
// initialize scaling
434490
scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
435491
// scale data set
436492
scaler_->scale(*data_ptr_);
437493
}
494+
495+
template <typename U>
496+
data_set<U>::data_set(soa_matrix<real_type> &&data_points) :
497+
num_data_points_{ data_points.num_rows() },
498+
num_features_{ data_points.num_cols() },
499+
data_ptr_{ std::make_shared<soa_matrix<real_type>>(std::move(data_points)) } {
500+
// the provided data points vector may not be empty
501+
if (data_ptr_->num_rows() == 0) {
502+
throw data_set_exception{ "Data vector is empty!" };
503+
}
504+
if (data_ptr_->num_cols() == 0) {
505+
throw data_set_exception{ "No features provided for the data points!" };
506+
}
507+
// the padding must be correct
508+
if (data_ptr_->padding() != shape{ PADDING_SIZE, PADDING_SIZE }) {
509+
throw data_set_exception{ fmt::format("Data vector has the wring padding ({})!", data_ptr_->padding()) };
510+
}
511+
}
512+
513+
template <typename U>
514+
data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
515+
num_data_points_{ data_points.num_rows() },
516+
num_features_{ data_points.num_cols() },
517+
data_ptr_{ std::make_shared<soa_matrix<real_type>>(std::move(data_points)) },
518+
labels_ptr_{ std::make_shared<std::vector<label_type>>(std::move(labels)) } {
519+
// the provided data points vector may not be empty
520+
if (data_ptr_->num_rows() == 0) {
521+
throw data_set_exception{ "Data vector is empty!" };
522+
}
523+
if (data_ptr_->num_cols() == 0) {
524+
throw data_set_exception{ "No features provided for the data points!" };
525+
}
526+
// the number of labels must be equal to the number of data points!
527+
if (data_ptr_->num_rows() != labels_ptr_->size()) {
528+
throw data_set_exception{ fmt::format("Number of labels ({}) must match the number of data points ({})!", labels_ptr_->size(), data_ptr_->num_rows()) };
529+
}
530+
// the padding must be correct
531+
if (data_ptr_->padding() != shape{ PADDING_SIZE, PADDING_SIZE }) {
532+
throw data_set_exception{ fmt::format("Data vector has the wring padding ({})!", data_ptr_->padding()) };
533+
}
534+
}
535+
536+
template <typename U>
537+
data_set<U>::data_set(soa_matrix<real_type> &&data_points, min_max_scaler scale_parameter) :
538+
data_set{ std::move(data_points) } {
539+
// initialize scaling
540+
scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
541+
// scale data set
542+
scaler_->scale(*data_ptr_);
543+
}
544+
545+
template <typename U>
546+
data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scale_parameter) :
438547
data_set{ std::move(data_points), std::move(labels) } {
439548
// initialize scaling
440-
scale_parameters_ = std::make_shared<scaling>(std::move(scale_parameter));
549+
scaler_ = std::make_shared<min_max_scaler>(std::move(scale_parameter));
441550
// scale data set
442-
this->scale();
551+
scaler_->scale(*data_ptr_);
443552
}
444553

445554
template <typename U>

0 commit comments

Comments
 (0)