|
| 1 | +/******************************************************************************* |
| 2 | +* Copyright 2023 Intel Corporation |
| 3 | +* |
| 4 | +* Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +* you may not use this file except in compliance with the License. |
| 6 | +* You may obtain a copy of the License at |
| 7 | +* |
| 8 | +* http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +* |
| 10 | +* Unless required by applicable law or agreed to in writing, software |
| 11 | +* distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +* See the License for the specific language governing permissions and |
| 14 | +* limitations under the License. |
| 15 | +*******************************************************************************/ |
| 16 | + |
| 17 | +#include "oneapi/dal/algo/basic_statistics.hpp" |
| 18 | + |
| 19 | +#include "onedal/common.hpp" |
| 20 | +#include "onedal/version.hpp" |
| 21 | + |
| 22 | +#include <string> |
| 23 | +#include <regex> |
| 24 | +#include <map> |
| 25 | + |
| 26 | +namespace py = pybind11; |
| 27 | + |
| 28 | +namespace oneapi::dal::python { |
| 29 | + |
| 30 | +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 |
| 31 | + |
| 32 | +namespace basic_statistics { |
| 33 | + |
| 34 | +template <typename Task, typename Ops> |
| 35 | +struct method2t { |
| 36 | + method2t(const Task& task, const Ops& ops) : ops(ops) {} |
| 37 | + |
| 38 | + template <typename Float> |
| 39 | + auto operator()(const py::dict& params) { |
| 40 | + using namespace dal::basic_statistics; |
| 41 | + |
| 42 | + const auto method = params["method"].cast<std::string>(); |
| 43 | + ONEDAL_PARAM_DISPATCH_VALUE(method, "dense", ops, Float, method::dense); |
| 44 | + ONEDAL_PARAM_DISPATCH_VALUE(method, "by_default", ops, Float, method::by_default); |
| 45 | + ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(method); |
| 46 | + } |
| 47 | + |
| 48 | + Ops ops; |
| 49 | +}; |
| 50 | + |
| 51 | +#define RESULT_OPTION(option) { #option, dal::basic_statistics::result_options::option } |
| 52 | + |
| 53 | +const std::map<std::string, dal::basic_statistics::result_option_id> result_option_registry { |
| 54 | + RESULT_OPTION(min), RESULT_OPTION(max), RESULT_OPTION(sum), RESULT_OPTION(mean), |
| 55 | + RESULT_OPTION(variance), RESULT_OPTION(variation), RESULT_OPTION(sum_squares), |
| 56 | + RESULT_OPTION(standard_deviation), RESULT_OPTION(sum_squares_centered), |
| 57 | + RESULT_OPTION(second_order_raw_moment) |
| 58 | +}; |
| 59 | + |
| 60 | +#undef RESULT_OPTION |
| 61 | + |
| 62 | +auto get_onedal_result_options(const py::dict& params) { |
| 63 | + using namespace dal::basic_statistics; |
| 64 | + |
| 65 | + auto result_option = params["result_option"].cast<std::string>(); |
| 66 | + result_option_id onedal_options; |
| 67 | + |
| 68 | + try { |
| 69 | + std::regex re("\\w+"); |
| 70 | + const std::sregex_iterator last{}; |
| 71 | + const std::sregex_iterator first( // |
| 72 | + result_option.begin(), |
| 73 | + result_option.end(), |
| 74 | + re); |
| 75 | + |
| 76 | + for (std::sregex_iterator it = first; it != last; ++it) { |
| 77 | + const auto str = it->str(); |
| 78 | + const auto match = result_option_registry.find(str); |
| 79 | + if (match == result_option_registry.cend()) { |
| 80 | + ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(result_option); |
| 81 | + } else { |
| 82 | + onedal_options = onedal_options | match->second; |
| 83 | + } |
| 84 | + } |
| 85 | + } |
| 86 | + catch (std::regex_error& e) { |
| 87 | + ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(result_option); |
| 88 | + } |
| 89 | + |
| 90 | + return onedal_options; |
| 91 | +} |
| 92 | + |
| 93 | +struct params2desc { |
| 94 | + template <typename Float, typename Method, typename Task> |
| 95 | + auto operator()(const py::dict& params) { |
| 96 | + auto desc = dal::basic_statistics::descriptor<Float, |
| 97 | + dal::basic_statistics::method::dense, dal::basic_statistics::task::compute>() |
| 98 | + .set_result_options(get_onedal_result_options(params)); |
| 99 | + return desc; |
| 100 | + } |
| 101 | +}; |
| 102 | + |
| 103 | +template <typename Policy, typename Task> |
| 104 | +struct init_compute_ops_dispatcher {}; |
| 105 | + |
| 106 | +template <typename Policy> |
| 107 | +struct init_compute_ops_dispatcher<Policy, dal::basic_statistics::task::compute> { |
| 108 | + void operator()(py::module_& m) { |
| 109 | + using Task = dal::basic_statistics::task::compute; |
| 110 | + m.def("train", |
| 111 | + [](const Policy& policy, |
| 112 | + const py::dict& params, |
| 113 | + const table& data, |
| 114 | + const table& weights) { |
| 115 | + using namespace dal::basic_statistics; |
| 116 | + using input_t = compute_input<Task>; |
| 117 | + |
| 118 | + compute_ops ops(policy, input_t{ data, weights }, params2desc{}); |
| 119 | + return fptype2t{ method2t{ Task{}, ops } }(params); |
| 120 | + }); |
| 121 | + } |
| 122 | +}; |
| 123 | + |
| 124 | +template <typename Policy, typename Task> |
| 125 | +void init_compute_ops(py::module& m) { |
| 126 | + init_compute_ops_dispatcher<Policy, Task>{}(m); |
| 127 | +} |
| 128 | + |
| 129 | +template <typename Task> |
| 130 | +void init_compute_result(py::module_& m) { |
| 131 | + using namespace dal::basic_statistics; |
| 132 | + using result_t = compute_result<Task>; |
| 133 | + |
| 134 | + auto cls = py::class_<result_t>(m, "compute_result") |
| 135 | + .def(py::init()) |
| 136 | + .DEF_ONEDAL_PY_PROPERTY(min, result_t) |
| 137 | + .DEF_ONEDAL_PY_PROPERTY(max, result_t) |
| 138 | + .DEF_ONEDAL_PY_PROPERTY(sum, result_t) |
| 139 | + .DEF_ONEDAL_PY_PROPERTY(mean, result_t) |
| 140 | + .DEF_ONEDAL_PY_PROPERTY(variance, result_t) |
| 141 | + .DEF_ONEDAL_PY_PROPERTY(variation, result_t) |
| 142 | + .DEF_ONEDAL_PY_PROPERTY(sum_squares, result_t) |
| 143 | + .DEF_ONEDAL_PY_PROPERTY(standard_deviation, result_t) |
| 144 | + .DEF_ONEDAL_PY_PROPERTY(sum_squares_centered, result_t) |
| 145 | + .DEF_ONEDAL_PY_PROPERTY(second_order_raw_moment, result_t); |
| 146 | +} |
| 147 | + |
| 148 | +ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_result); |
| 149 | +ONEDAL_PY_DECLARE_INSTANTIATOR(init_compute_ops); |
| 150 | + |
| 151 | +} // namespace basic_statistics |
| 152 | + |
| 153 | +ONEDAL_PY_INIT_MODULE(basic_statistics) { |
| 154 | + using namespace dal::detail; |
| 155 | + using namespace basic_statistics; |
| 156 | + using namespace dal::basic_statistics; |
| 157 | + |
| 158 | + auto sub = m.def_submodule("basic_statistics"); |
| 159 | + using task_list = types<dal::basic_statistics::task::compute>; |
| 160 | + |
| 161 | +#ifdef ONEDAL_DATA_PARALLEL_SPMD |
| 162 | + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list_spmd, task_list); |
| 163 | +#else // ONEDAL_DATA_PARALLEL_SPMD |
| 164 | + ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task_list); |
| 165 | +#endif // ONEDAL_DATA_PARALLEL_SPMD |
| 166 | + |
| 167 | + ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task_list); |
| 168 | +} |
| 169 | + |
| 170 | +ONEDAL_PY_TYPE2STR(dal::basic_statistics::task::compute, "compute"); |
| 171 | + |
| 172 | +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20230100 |
| 173 | + |
| 174 | +} // namespace oneapi::dal::python |
0 commit comments