@@ -82,7 +82,7 @@ class data_set {
8282 */
8383 data_set (const std::string &filename, file_format_type format);
8484 /* *
85- * @brief Read the data points from the file @p filename and scale it using the provided @p scale_parameter .
85+ * @brief Read the data points from the file @p filename and scale it using the provided @p scaler .
8686 * Automatically determines the plssvm::file_format_type based on the file extension.
8787 * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used.
8888 * @param[in] filename the file to read the data points from
@@ -104,7 +104,7 @@ class data_set {
104104
105105 /* *
106106 * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix.
107- * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvm ::fit!
107+ * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr ::fit!
108108 * @param[in] data_points the data points used in this data set
109109 * @throws plssvm::data_set_exception if the @p data_points vector is empty
110110 * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
@@ -146,7 +146,7 @@ class data_set {
146146
147147 /* *
148148 * @brief Create a new data set from the provided @p data_points.
149- * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvm ::fit!
149+ * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr ::fit!
150150 * @note If the provided matrix isn't padded, adds the necessary padding entries automatically.
151151 * @tparam layout the layout type of the input matrix
152152 * @param[in] data_points the data points used in this data set
@@ -196,9 +196,59 @@ class data_set {
196196 * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
197197 */
198198 template <layout_type layout>
199- data_set (const matrix<real_type, layout> &data_points, std::vector<label_type> labels, scaling scale_parameter);
200199 data_set (const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scaler);
201200
201+ /* *
202+ * @brief Use the provided @p data_points in this data set.
203+ * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
204+ * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
205+ * @param[in] data_points the data points used in this data set
206+ * @throws plssvm::data_set_exception if the @p data_points vector is empty
207+ * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
208+ * @throws plssvm::data_set_exception if any @p data_point has no features
209+ * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
210+ */
211+ explicit data_set (soa_matrix<real_type> &&data_points);
212+ /* *
213+ * @brief Use the provided @p data_points and @p labels in this data set.
214+ * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
215+ * @param[in] data_points the data points used in this data set
216+ * @param[in] labels the labels used in this data set
217+ * @throws plssvm::data_set_exception if the @p data_points vector is empty
218+ * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
219+ * @throws plssvm::data_set_exception if any @p data_point has no features
220+ * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
221+ * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
222+ */
223+ data_set (soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels);
224+ /* *
225+ * @brief Use the provided @p data_points in this data set and scale them using the provided @p scaler.
226+ * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvc::fit/plssvm::csvr::fit!
227+ * @note Moves the @p data_points into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
228+ * @param[in] data_points the data points used in this data set
229+ * @param[in] scaler the parameters used to scale the data set feature values to a given range
230+ * @throws plssvm::data_set_exception if the @p data_points vector is empty
231+ * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
232+ * @throws plssvm::data_set_exception if any @p data_point has no features
233+ * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
234+ * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
235+ */
236+ data_set (soa_matrix<real_type> &&data_points, min_max_scaler scaler);
237+ /* *
238+ * @brief Use the provided @p data_points and @p labels in this data set and scale them using the provided @p scaler.
239+ * @note Moves the @p data_points and @p labels into this data set. If @p data_points have the wrong padding, a runtime exception is thrown.
240+ * @param[in] data_points the data points used in this data set
241+ * @param[in] labels the labels used in this data set
242+ * @param[in] scaler the parameters used to scale the data set feature values to a given range
243+ * @throws plssvm::data_set_exception if the @p data_points vector is empty
244+ * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features
245+ * @throws plssvm::data_set_exception if any @p data_point has no features
246+ * @throws plssvm::data_set_exception if the padding sizes of @p data_points are wrong
247+ * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch
248+ * @throws plssvm::min_max_scaler_exception all exceptions thrown by plssvm::min_max_scaler::scale
249+ */
250+ data_set (soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scaler);
251+
202252 /* *
203253 * @brief Default copy constructor.
204254 */
@@ -409,6 +459,13 @@ data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<
409459 num_features_{ data_points.num_cols () },
410460 data_ptr_{ std::make_shared<soa_matrix<real_type>>(data_points, shape{ PADDING_SIZE, PADDING_SIZE }) },
411461 labels_ptr_{ std::make_shared<std::vector<label_type>>(std::move (labels)) } {
462+ // the provided data points vector may not be empty
463+ if (data_ptr_->num_rows () == 0 ) {
464+ throw data_set_exception{ " Data vector is empty!" };
465+ }
466+ if (data_ptr_->num_cols () == 0 ) {
467+ throw data_set_exception{ " No features provided for the data points!" };
468+ }
412469 // the number of labels must be equal to the number of data points!
413470 if (data_ptr_->num_rows () != labels_ptr_->size ()) {
414471 throw data_set_exception{ fmt::format (" Number of labels ({}) must match the number of data points ({})!" , labels_ptr_->size (), data_ptr_->num_rows ()) };
@@ -427,19 +484,71 @@ data_set<U>::data_set(const matrix<real_type, layout> &data_points, min_max_scal
427484
428485template <typename U>
429486template <layout_type layout>
430- data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, scaling scale_parameter) :
431487data_set<U>::data_set(const matrix<real_type, layout> &data_points, std::vector<label_type> labels, min_max_scaler scale_parameter) :
432488 data_set{ data_points, std::move (labels) } {
433489 // initialize scaling
434490 scaler_ = std::make_shared<min_max_scaler>(std::move (scale_parameter));
435491 // scale data set
436492 scaler_->scale (*data_ptr_);
437493}
494+
495+ template <typename U>
496+ data_set<U>::data_set(soa_matrix<real_type> &&data_points) :
497+ num_data_points_{ data_points.num_rows () },
498+ num_features_{ data_points.num_cols () },
499+ data_ptr_{ std::make_shared<soa_matrix<real_type>>(std::move (data_points)) } {
500+ // the provided data points vector may not be empty
501+ if (data_ptr_->num_rows () == 0 ) {
502+ throw data_set_exception{ " Data vector is empty!" };
503+ }
504+ if (data_ptr_->num_cols () == 0 ) {
505+ throw data_set_exception{ " No features provided for the data points!" };
506+ }
507+ // the padding must be correct
508+ if (data_ptr_->padding () != shape{ PADDING_SIZE, PADDING_SIZE }) {
509+ throw data_set_exception{ fmt::format (" Data vector has the wring padding ({})!" , data_ptr_->padding ()) };
510+ }
511+ }
512+
513+ template <typename U>
514+ data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels) :
515+ num_data_points_{ data_points.num_rows () },
516+ num_features_{ data_points.num_cols () },
517+ data_ptr_{ std::make_shared<soa_matrix<real_type>>(std::move (data_points)) },
518+ labels_ptr_{ std::make_shared<std::vector<label_type>>(std::move (labels)) } {
519+ // the provided data points vector may not be empty
520+ if (data_ptr_->num_rows () == 0 ) {
521+ throw data_set_exception{ " Data vector is empty!" };
522+ }
523+ if (data_ptr_->num_cols () == 0 ) {
524+ throw data_set_exception{ " No features provided for the data points!" };
525+ }
526+ // the number of labels must be equal to the number of data points!
527+ if (data_ptr_->num_rows () != labels_ptr_->size ()) {
528+ throw data_set_exception{ fmt::format (" Number of labels ({}) must match the number of data points ({})!" , labels_ptr_->size (), data_ptr_->num_rows ()) };
529+ }
530+ // the padding must be correct
531+ if (data_ptr_->padding () != shape{ PADDING_SIZE, PADDING_SIZE }) {
532+ throw data_set_exception{ fmt::format (" Data vector has the wring padding ({})!" , data_ptr_->padding ()) };
533+ }
534+ }
535+
536+ template <typename U>
537+ data_set<U>::data_set(soa_matrix<real_type> &&data_points, min_max_scaler scale_parameter) :
538+ data_set{ std::move (data_points) } {
539+ // initialize scaling
540+ scaler_ = std::make_shared<min_max_scaler>(std::move (scale_parameter));
541+ // scale data set
542+ scaler_->scale (*data_ptr_);
543+ }
544+
545+ template <typename U>
546+ data_set<U>::data_set(soa_matrix<real_type> &&data_points, std::vector<label_type> &&labels, min_max_scaler scale_parameter) :
438547 data_set{ std::move (data_points), std::move (labels) } {
439548 // initialize scaling
440- scale_parameters_ = std::make_shared<scaling >(std::move (scale_parameter));
549+ scaler_ = std::make_shared<min_max_scaler >(std::move (scale_parameter));
441550 // scale data set
442- this ->scale ();
551+ scaler_ ->scale (*data_ptr_ );
443552}
444553
445554template <typename U>
0 commit comments