alpaka3/test/unit/math/DataGen.hpp at ffcfc3c27f7c821655a684f024f58f47291b94a4 · psychocoderHPC/alpaka3 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/* Copyright 2022 Jakob Krude, Benjamin Worpitz, Jeffrey Kelling, Jan Stephan, Sergei Bastrakov
 * SPDX-License-Identifier: MPL-2.0
 */

#pragma once

#include "Defines.hpp"

#include <cassert>
#include <cmath>
#include <limits>
#include <random>

#if ALPAKA_COMP_CLANG
#    pragma clang diagnostic push
#    pragma clang diagnostic ignored "-Wswitch-default"
#endif

namespace mathtest
{
    //! Helper to generate random numbers of the given type for testing
    //!
    //! The general implementation supports float and double types
    //!
    //! @tparam TData generated type
    template<typename TData>
    struct RngWrapper
    {
        auto getMax()
        {
            return std::numeric_limits<TData>::max();
        }

        auto getLowest()
        {
            return std::numeric_limits<TData>::lowest();
        }

        auto getDistribution()
        {
            return std::uniform_real_distribution<TData>{0, 1000};
        }

        template<typename TDistribution, typename TEngine>
        auto getNumber(TDistribution& distribution, TEngine& engine)
        {
            return distribution(engine);
        }
    };

    //! Specialization for generating alpaka::math::Complex<TData>
    //!
    //! It has a much reduced range of numbers.
    //! The reason is, the results of operations much easier go to infinity area.
    //! Also, alpaka may emulate complex number math via calling other functions.
    //! As a result, it may produce some infinities and NaNs when the std:: implementation would not.
    //! So this range at least makes sure the "simple" cases work and therefore the implementation is
    //! logically correct.
    template<typename TData>
    struct RngWrapper<alpaka::math::Complex<TData>>
    {
        auto getMax()
        {
            return alpaka::math::Complex<TData>{TData{10}, TData{10}};
        }

        auto getLowest()
        {
            return -getMax();
        }

        auto getDistribution()
        {
            return std::uniform_real_distribution<TData>{0, 5};
        }

        template<typename TDistribution, typename TEngine>
        auto getNumber(TDistribution& distribution, TEngine& engine)
        {
            return alpaka::math::Complex<TData>{distribution(engine), distribution(engine)};
        }
    };

    /**
     * Fills buffer with random numbers (host-only).
     *
     * @tparam TData The used data-type (float, double, Complex<float> or Complex<double>).
     * @tparam TArgs The args-buffer to be filled.
     * @tparam TFunctor The used Functor-type.
     * @param args The buffer that should be filled.
     * @param functor The Functor, needed for ranges.
     * @param seed The used seed.
     */
    template<typename TData, typename TArgs, typename TFunctor>
    auto fillWithRndArgs(TArgs& args, TFunctor functor, unsigned int const& seed) -> void
    {
        /*
         * Each "sub-buffer" is filled with zero and/or max and/or lowest,
         * depending on the specified range (at [0] - [2]).
         *
         * Every switch case needs to return!
         * If no switch case was matched an assert(false) will be triggered.
         *
         * This function is easily extendable. It is only necessary to add extra
         * definitions in the switch case, for more Range-types.
         */
        static_assert(TArgs::value_type::arity == TFunctor::arity, "Buffer properties must match TFunctor::arity");
        if(args.getCapacity() < 6)
            throw std::runtime_error("Set of args must provide > 6 entries.");
        auto rngWrapper = RngWrapper<TData>{};
        auto const max = rngWrapper.getMax();
        auto const low = rngWrapper.getLowest();
        std::default_random_engine eng{static_cast<std::default_random_engine::result_type>(seed)};

        // These pseudo-random numbers are implementation/platform specific!
        auto dist = rngWrapper.getDistribution();
        decltype(dist) distOne(-1, 1);
        for(size_t k = 0; k < TFunctor::arity_nr; ++k)
        {
            [[maybe_unused]] bool matchedSwitch = false;
            switch(functor.ranges[k])
            {
            case Range::OneNeighbourhood:
                matchedSwitch = true;
                for(size_t i = 0; i < args.getCapacity(); ++i)
                {
                    args(i).arg[k] = rngWrapper.getNumber(distOne, eng);
                }
                break;

            case Range::PositiveOnly:
                matchedSwitch = true;
                args(0).arg[k] = max;
                for(size_t i = 1; i < args.getCapacity(); ++i)
                {
                    args(i).arg[k] = rngWrapper.getNumber(dist, eng) + TData{1};
                }
                break;

            case Range::PositiveAndZero:
                matchedSwitch = true;
                args(0).arg[k] = TData{0};
                args(1).arg[k] = max;
                for(size_t i = 2; i < args.getCapacity(); ++i)
                {
                    args(i).arg[k] = rngWrapper.getNumber(dist, eng);
                }
                break;

            case Range::NotZero:
                matchedSwitch = true;
                args(0).arg[k] = max;
                args(1).arg[k] = low;
                for(size_t i = 2; i < args.getCapacity(); ++i)
                {
                    TData arg;
                    do
                    {
                        arg = rngWrapper.getNumber(dist, eng);
                    } while(std::equal_to<TData>()(arg, 1));
                    if(i % 2 == 0)
                        args(i).arg[k] = arg;
                    else
                        args(i).arg[k] = -arg;
                }
                break;

            case Range::Unrestricted:
                matchedSwitch = true;
                args(0).arg[k] = TData{0};
                args(1).arg[k] = max;
                args(2).arg[k] = low;
                for(size_t i = 3; i < args.getCapacity(); ++i)
                {
                    if(i % 2 == 0)
                        args(i).arg[k] = rngWrapper.getNumber(dist, eng);
                    else
                        args(i).arg[k] = -rngWrapper.getNumber(dist, eng);
                }
                break;

            case Range::Anything:
                matchedSwitch = true;
                args(0).arg[k] = TData{0};
                args(1).arg[k] = std::numeric_limits<TData>::quiet_NaN();
                args(2).arg[k] = std::numeric_limits<TData>::signaling_NaN();
                args(3).arg[k] = std::numeric_limits<TData>::infinity();
                args(4).arg[k] = -std::numeric_limits<TData>::infinity();
                // One negative one positive value
                if constexpr(std::is_same_v<TData, float>)
                {
                    args(5).arg[k] = 1.1f; // Use float literal
                    args(6).arg[k] = -1.1f;
                }
                else if constexpr(std::is_same_v<TData, double>)
                {
                    args(5).arg[k] = 1.1; // Use double literal
                    args(6).arg[k] = -1.1;
                }

                else if constexpr(std::is_same_v<TData, alpaka::math::Complex<float>>)
                {
                    args(5).arg[k] = alpaka::math::Complex<float>{1.1f, 2.1f}; // Complex float
                    args(6).arg[k] = alpaka::math::Complex<float>{-1.1f, -2.1f};
                }
                else if constexpr(std::is_same_v<TData, alpaka::math::Complex<double>>)
                {
                    args(5).arg[k] = alpaka::math::Complex<double>{1.1, 2.1}; // Complex double
                    args(6).arg[k] = alpaka::math::Complex<double>{-1.1, -2.1};
                }

                constexpr size_t nFixed = 7;
                size_t i = nFixed;
                // no need to test for denormal for now: not supported by CUDA
                // for(; i < nFixed + (args.getCapacity() - nFixed) / 2; ++i)
                // {
                //     const TData v = rngWrapper.getNumber(dist, eng) *
                //     std::numeric_limits<TData>::denorm_min(); args(i).arg[k] = (i % 2 == 0) ? v : -v;
                // }
                // Next values
                for(; i < args.getCapacity(); ++i)
                {
                    TData const v = rngWrapper.getNumber(dist, eng);
                    args(i).arg[k] = (i % 2 == 0) ? v : -v;
                }
                break;
            }
            assert(matchedSwitch);
        }
    }
} // namespace mathtest

#if ALPAKA_COMP_CLANG
#    pragma clang diagnostic pop
#endif