|
| 1 | +// ====------------ math-emu-double.cu---------- *- CUDA -* -------------===//// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +// |
| 8 | +// ===---------------------------------------------------------------------===// |
| 9 | + |
| 10 | +#include <iomanip> |
| 11 | +#include <iostream> |
| 12 | +#include <vector> |
| 13 | + |
| 14 | +using namespace std; |
| 15 | + |
| 16 | +typedef vector<double> d_vector; |
| 17 | +typedef tuple<double, double, double> d_tuple3; |
| 18 | +typedef tuple<double, double, double, double> d_tuple4; |
| 19 | +typedef pair<double, int> di_pair; |
| 20 | + |
| 21 | +int passed = 0; |
| 22 | +int failed = 0; |
| 23 | + |
| 24 | +void check(bool IsPassed) { |
| 25 | + if (IsPassed) { |
| 26 | + cout << " ---- passed" << endl; |
| 27 | + passed++; |
| 28 | + } else { |
| 29 | + cout << " ---- failed" << endl; |
| 30 | + failed++; |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +template <typename T = double> |
| 35 | +void checkResult(const string &FuncName, const vector<T> &Inputs, |
| 36 | + const double &Expect, const double &DeviceResult, |
| 37 | + const int precision) { |
| 38 | + cout << FuncName << "(" << Inputs[0]; |
| 39 | + for (size_t i = 1; i < Inputs.size(); ++i) { |
| 40 | + cout << ", " << Inputs[i]; |
| 41 | + } |
| 42 | + cout << ") = " << fixed << setprecision(precision) << DeviceResult |
| 43 | + << " (expect " << Expect - pow(10, -precision) << " ~ " |
| 44 | + << Expect + pow(10, -precision) << ")"; |
| 45 | + cout.unsetf(ios::fixed); |
| 46 | + check(abs(DeviceResult - Expect) < pow(10, -precision)); |
| 47 | +} |
| 48 | + |
| 49 | +__global__ void setVecValue(double *Input1, const double Input2) { |
| 50 | + *Input1 = Input2; |
| 51 | +} |
| 52 | + |
| 53 | +__global__ void _norm(double *const DeviceResult, int Input1, |
| 54 | + const double *Input2) { |
| 55 | + *DeviceResult = norm(Input1, Input2); |
| 56 | +} |
| 57 | + |
| 58 | +void testNorm(double *const DeviceResult, int Input1, const double *Input2) { |
| 59 | + _norm<<<1, 1>>>(DeviceResult, Input1, Input2); |
| 60 | + cudaDeviceSynchronize(); |
| 61 | + // TODO: Need test host side. |
| 62 | +} |
| 63 | + |
| 64 | +void testNormCases(const vector<pair<d_vector, di_pair>> &TestCases) { |
| 65 | + double *DeviceResult; |
| 66 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 67 | + // Other test values. |
| 68 | + for (const auto &TestCase : TestCases) { |
| 69 | + double *Input; |
| 70 | + cudaMallocManaged(&Input, TestCase.first.size() * sizeof(*Input)); |
| 71 | + for (size_t i = 0; i < TestCase.first.size(); ++i) { |
| 72 | + // Notice: cannot set value from host! |
| 73 | + setVecValue<<<1, 1>>>(Input + i, TestCase.first[i]); |
| 74 | + cudaDeviceSynchronize(); |
| 75 | + } |
| 76 | + testNorm(DeviceResult, TestCase.first.size(), Input); |
| 77 | + string arg = "&{"; |
| 78 | + for (size_t i = 0; i < TestCase.first.size() - 1; ++i) { |
| 79 | + arg += to_string(TestCase.first[i]) + ", "; |
| 80 | + } |
| 81 | + arg += to_string(TestCase.first.back()) + "}"; |
| 82 | + checkResult<string>("norm", {to_string(TestCase.first.size()), arg}, |
| 83 | + TestCase.second.first, *DeviceResult, |
| 84 | + TestCase.second.second); |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +__global__ void _norm3d(double *const DeviceResult, double Input1, |
| 89 | + double Input2, double Input3) { |
| 90 | + *DeviceResult = norm3d(Input1, Input2, Input3); |
| 91 | +} |
| 92 | + |
| 93 | +void testNorm3d(double *const DeviceResult, double Input1, double Input2, |
| 94 | + double Input3) { |
| 95 | + _norm3d<<<1, 1>>>(DeviceResult, Input1, Input2, Input3); |
| 96 | + cudaDeviceSynchronize(); |
| 97 | + // Call from host. |
| 98 | +} |
| 99 | + |
| 100 | +void testNorm3dCases(const vector<pair<d_tuple3, di_pair>> &TestCases) { |
| 101 | + double *DeviceResult; |
| 102 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 103 | + for (const auto &TestCase : TestCases) { |
| 104 | + testNorm3d(DeviceResult, get<0>(TestCase.first), get<1>(TestCase.first), |
| 105 | + get<2>(TestCase.first)); |
| 106 | + checkResult("norm3d", |
| 107 | + {get<0>(TestCase.first), get<1>(TestCase.first), |
| 108 | + get<2>(TestCase.first)}, |
| 109 | + TestCase.second.first, *DeviceResult, TestCase.second.second); |
| 110 | + } |
| 111 | +} |
| 112 | + |
| 113 | +__global__ void _norm4d(double *const DeviceResult, double Input1, |
| 114 | + double Input2, double Input3, double Input4) { |
| 115 | + *DeviceResult = norm4d(Input1, Input2, Input3, Input4); |
| 116 | +} |
| 117 | + |
| 118 | +void testNorm4d(double *const DeviceResult, double Input1, double Input2, |
| 119 | + double Input3, double Input4) { |
| 120 | + _norm4d<<<1, 1>>>(DeviceResult, Input1, Input2, Input3, Input4); |
| 121 | + cudaDeviceSynchronize(); |
| 122 | + // Call from host. |
| 123 | +} |
| 124 | + |
| 125 | +void testNorm4dCases(const vector<pair<d_tuple4, di_pair>> &TestCases) { |
| 126 | + double *DeviceResult; |
| 127 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 128 | + for (const auto &TestCase : TestCases) { |
| 129 | + testNorm4d(DeviceResult, get<0>(TestCase.first), get<1>(TestCase.first), |
| 130 | + get<2>(TestCase.first), get<3>(TestCase.first)); |
| 131 | + checkResult("norm4d", |
| 132 | + {get<0>(TestCase.first), get<1>(TestCase.first), |
| 133 | + get<2>(TestCase.first), get<3>(TestCase.first)}, |
| 134 | + TestCase.second.first, *DeviceResult, TestCase.second.second); |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +__global__ void _normcdf(double *const DeviceResult, double Input) { |
| 139 | + *DeviceResult = normcdf(Input); |
| 140 | +} |
| 141 | + |
| 142 | +void testNormcdf(double *const DeviceResult, double Input) { |
| 143 | + _normcdf<<<1, 1>>>(DeviceResult, Input); |
| 144 | + cudaDeviceSynchronize(); |
| 145 | + // Call from host. |
| 146 | +} |
| 147 | + |
| 148 | +void testNormcdfCases(const vector<pair<double, di_pair>> &TestCases) { |
| 149 | + double *DeviceResult; |
| 150 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 151 | + // Other test values. |
| 152 | + for (const auto &TestCase : TestCases) { |
| 153 | + testNormcdf(DeviceResult, TestCase.first); |
| 154 | + checkResult("normcdf", {TestCase.first}, TestCase.second.first, |
| 155 | + *DeviceResult, TestCase.second.second); |
| 156 | + } |
| 157 | +} |
| 158 | + |
| 159 | +__global__ void _rnorm(double *const DeviceResult, int Input1, |
| 160 | + const double *Input2) { |
| 161 | + *DeviceResult = rnorm(Input1, Input2); |
| 162 | +} |
| 163 | + |
| 164 | +void testRnorm(double *const DeviceResult, int Input1, const double *Input2) { |
| 165 | + _rnorm<<<1, 1>>>(DeviceResult, Input1, Input2); |
| 166 | + cudaDeviceSynchronize(); |
| 167 | + // Call from host. |
| 168 | +} |
| 169 | + |
| 170 | +void testRnormCases(const vector<pair<d_vector, di_pair>> &TestCases) { |
| 171 | + double *DeviceResult; |
| 172 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 173 | + // Other test values. |
| 174 | + for (const auto &TestCase : TestCases) { |
| 175 | + double *Input; |
| 176 | + cudaMallocManaged(&Input, TestCase.first.size() * sizeof(*Input)); |
| 177 | + for (size_t i = 0; i < TestCase.first.size(); ++i) { |
| 178 | + // Notice: cannot set value from host! |
| 179 | + setVecValue<<<1, 1>>>(Input + i, TestCase.first[i]); |
| 180 | + cudaDeviceSynchronize(); |
| 181 | + } |
| 182 | + testRnorm(DeviceResult, TestCase.first.size(), Input); |
| 183 | + string arg = "&{"; |
| 184 | + for (size_t i = 0; i < TestCase.first.size() - 1; ++i) { |
| 185 | + arg += to_string(TestCase.first[i]) + ", "; |
| 186 | + } |
| 187 | + arg += to_string(TestCase.first.back()) + "}"; |
| 188 | + checkResult<string>("rnorm", {to_string(TestCase.first.size()), arg}, |
| 189 | + TestCase.second.first, *DeviceResult, |
| 190 | + TestCase.second.second); |
| 191 | + } |
| 192 | +} |
| 193 | + |
| 194 | +__global__ void _rnorm3d(double *const DeviceResult, double Input1, |
| 195 | + double Input2, double Input3) { |
| 196 | + *DeviceResult = rnorm3d(Input1, Input2, Input3); |
| 197 | +} |
| 198 | + |
| 199 | +void testRnorm3d(double *const DeviceResult, double Input1, double Input2, |
| 200 | + double Input3) { |
| 201 | + _rnorm3d<<<1, 1>>>(DeviceResult, Input1, Input2, Input3); |
| 202 | + cudaDeviceSynchronize(); |
| 203 | + // Call from host. |
| 204 | +} |
| 205 | + |
| 206 | +void testRnorm3dCases(const vector<pair<d_tuple3, di_pair>> &TestCases) { |
| 207 | + double *DeviceResult; |
| 208 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 209 | + for (const auto &TestCase : TestCases) { |
| 210 | + testRnorm3d(DeviceResult, get<0>(TestCase.first), get<1>(TestCase.first), |
| 211 | + get<2>(TestCase.first)); |
| 212 | + checkResult("rnorm3d", |
| 213 | + {get<0>(TestCase.first), get<1>(TestCase.first), |
| 214 | + get<2>(TestCase.first)}, |
| 215 | + TestCase.second.first, *DeviceResult, TestCase.second.second); |
| 216 | + } |
| 217 | +} |
| 218 | + |
| 219 | +__global__ void _rnorm4d(double *const DeviceResult, double Input1, |
| 220 | + double Input2, double Input3, double Input4) { |
| 221 | + *DeviceResult = rnorm4d(Input1, Input2, Input3, Input4); |
| 222 | +} |
| 223 | + |
| 224 | +void testRnorm4d(double *const DeviceResult, double Input1, double Input2, |
| 225 | + double Input3, double Input4) { |
| 226 | + _rnorm4d<<<1, 1>>>(DeviceResult, Input1, Input2, Input3, Input4); |
| 227 | + cudaDeviceSynchronize(); |
| 228 | + // Call from host. |
| 229 | +} |
| 230 | + |
| 231 | +void testRnorm4dCases(const vector<pair<d_tuple4, di_pair>> &TestCases) { |
| 232 | + double *DeviceResult; |
| 233 | + cudaMallocManaged(&DeviceResult, sizeof(*DeviceResult)); |
| 234 | + for (const auto &TestCase : TestCases) { |
| 235 | + testRnorm4d(DeviceResult, get<0>(TestCase.first), get<1>(TestCase.first), |
| 236 | + get<2>(TestCase.first), get<3>(TestCase.first)); |
| 237 | + checkResult("rnorm4d", |
| 238 | + {get<0>(TestCase.first), get<1>(TestCase.first), |
| 239 | + get<2>(TestCase.first), get<3>(TestCase.first)}, |
| 240 | + TestCase.second.first, *DeviceResult, TestCase.second.second); |
| 241 | + } |
| 242 | +} |
| 243 | + |
| 244 | +int main() { |
| 245 | + testNormCases({ |
| 246 | + {{-0.3, -0.34, -0.98}, {1.079814798935447, 15}}, |
| 247 | + {{0.3, 0.34, 0.98}, {1.079814798935447, 15}}, |
| 248 | + {{0.5}, {0.5, 16}}, |
| 249 | + {{23, 432, 23, 456, 23}, {629.4020972319682, 13}}, |
| 250 | + }); |
| 251 | + testNorm3dCases({ |
| 252 | + {{-0.3, -0.34, -0.98}, {1.079814798935447, 15}}, |
| 253 | + {{0.3, 0.34, 0.98}, {1.079814798935447, 15}}, |
| 254 | + {{0.5, 456, 23}, {456.5799491874342, 13}}, |
| 255 | + {{23, 432, 23}, {433.222806417206, 13}}, |
| 256 | + }); |
| 257 | + testNorm4dCases({ |
| 258 | + {{-0.3, -0.34, -0.98, 1}, {1.471733671558818, 15}}, |
| 259 | + {{0.3, 0.34, 0.98, 1}, {1.471733671558818, 15}}, |
| 260 | + {{0.5, 456, 23, 1}, {456.5810442845827, 13}}, |
| 261 | + {{23, 432, 23, 1}, {433.2239605562001, 13}}, |
| 262 | + }); |
| 263 | + testNormcdfCases({ |
| 264 | + {-5, {0.000000286651571879194, 21}}, |
| 265 | + {-3, {0.001349898031630095, 18}}, |
| 266 | + {0, {0.5, 16}}, |
| 267 | + {1, {0.841344746068543, 15}}, |
| 268 | + {5, {0.9999997133484281, 16}}, |
| 269 | + }); |
| 270 | + testRnormCases({ |
| 271 | + {{-0.3, -0.34, -0.98}, {0.926084733220795, 15}}, |
| 272 | + {{0.3, 0.34, 0.98}, {0.926084733220795, 15}}, |
| 273 | + {{0.5}, {2, 15}}, |
| 274 | + {{23, 432, 23, 456, 23}, {0.001588809450108087, 18}}, |
| 275 | + }); |
| 276 | + testRnorm3dCases({ |
| 277 | + {{-0.3, -0.34, -0.98}, {0.926084733220795, 15}}, |
| 278 | + {{0.3, 0.34, 0.98}, {0.926084733220795, 15}}, |
| 279 | + {{0.5, 456, 23}, {0.002190196923407782, 18}}, |
| 280 | + {{23, 432, 23}, {0.002308281062740199, 18}}, |
| 281 | + }); |
| 282 | + testRnorm4dCases({ |
| 283 | + {{-0.3, -0.34, -0.98, 1}, {0.679470762492529, 15}}, |
| 284 | + {{0.3, 0.34, 0.98, 1}, {0.679470762492529, 15}}, |
| 285 | + {{0.5, 456, 23, 1}, {0.002190191670280358, 18}}, |
| 286 | + {{23, 432, 23, 1}, {0.002308274913317669, 18}}, |
| 287 | + }); |
| 288 | + cout << "passed " << passed << "/" << passed + failed << " cases!" << endl; |
| 289 | + if (failed) { |
| 290 | + cout << "failed!" << endl; |
| 291 | + } |
| 292 | + return failed; |
| 293 | +} |
0 commit comments