|
| 1 | +/* |
| 2 | +Copyright 2010-2011, D. E. Shaw Research. |
| 3 | +All rights reserved. |
| 4 | +
|
| 5 | +Redistribution and use in source and binary forms, with or without |
| 6 | +modification, are permitted provided that the following conditions are |
| 7 | +met: |
| 8 | +
|
| 9 | +* Redistributions of source code must retain the above copyright |
| 10 | + notice, this list of conditions, and the following disclaimer. |
| 11 | +
|
| 12 | +* Redistributions in binary form must reproduce the above copyright |
| 13 | + notice, this list of conditions, and the following disclaimer in the |
| 14 | + documentation and/or other materials provided with the distribution. |
| 15 | +
|
| 16 | +* Neither the name of D. E. Shaw Research nor the names of its |
| 17 | + contributors may be used to endorse or promote products derived from |
| 18 | + this software without specific prior written permission. |
| 19 | +
|
| 20 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | +*/ |
| 32 | +#ifndef __Random123_ars_dot_hpp__ |
| 33 | +#define __Random123_ars_dot_hpp__ |
| 34 | + |
| 35 | +#include "features/compilerfeatures.h" |
| 36 | +#include "array.h" |
| 37 | + |
| 38 | +#if R123_USE_AES_NI |
| 39 | + |
| 40 | +#ifndef ARS1xm128i_DEFAULT_ROUNDS |
| 41 | +#define ARS1xm128i_DEFAULT_ROUNDS 7 |
| 42 | +#endif |
| 43 | + |
| 44 | +/** @ingroup AESNI */ |
| 45 | +enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS}; |
| 46 | + |
| 47 | +/* ARS1xm128i with Weyl keys. Fast, and Crush-resistant, but NOT CRYPTO. */ |
| 48 | +/** @ingroup AESNI */ |
| 49 | +typedef struct r123array1xm128i ars1xm128i_ctr_t; |
| 50 | +/** @ingroup AESNI */ |
| 51 | +typedef struct r123array1xm128i ars1xm128i_key_t; |
| 52 | +/** @ingroup AESNI */ |
| 53 | +typedef struct r123array1xm128i ars1xm128i_ukey_t; |
| 54 | +/** @ingroup AESNI */ |
| 55 | +R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; } |
| 56 | +/** @ingroup AESNI */ |
| 57 | +R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){ |
| 58 | + __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */ |
| 59 | + R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */ |
| 60 | + /* N.B. the aesenc instructions do the xor *after* |
| 61 | + // so if we want to follow the AES pattern, we |
| 62 | + // have to do the initial xor explicitly */ |
| 63 | + __m128i kk = k.v[0].m; |
| 64 | + __m128i v = _mm_xor_si128(in.v[0].m, kk); |
| 65 | + ars1xm128i_ctr_t ret; |
| 66 | + R123_ASSERT(Nrounds<=10); |
| 67 | + if( Nrounds>1 ){ |
| 68 | + kk = _mm_add_epi64(kk, kweyl); |
| 69 | + v = _mm_aesenc_si128(v, kk); |
| 70 | + } |
| 71 | + if( Nrounds>2 ){ |
| 72 | + kk = _mm_add_epi64(kk, kweyl); |
| 73 | + v = _mm_aesenc_si128(v, kk); |
| 74 | + } |
| 75 | + if( Nrounds>3 ){ |
| 76 | + kk = _mm_add_epi64(kk, kweyl); |
| 77 | + v = _mm_aesenc_si128(v, kk); |
| 78 | + } |
| 79 | + if( Nrounds>4 ){ |
| 80 | + kk = _mm_add_epi64(kk, kweyl); |
| 81 | + v = _mm_aesenc_si128(v, kk); |
| 82 | + } |
| 83 | + if( Nrounds>5 ){ |
| 84 | + kk = _mm_add_epi64(kk, kweyl); |
| 85 | + v = _mm_aesenc_si128(v, kk); |
| 86 | + } |
| 87 | + if( Nrounds>6 ){ |
| 88 | + kk = _mm_add_epi64(kk, kweyl); |
| 89 | + v = _mm_aesenc_si128(v, kk); |
| 90 | + } |
| 91 | + if( Nrounds>7 ){ |
| 92 | + kk = _mm_add_epi64(kk, kweyl); |
| 93 | + v = _mm_aesenc_si128(v, kk); |
| 94 | + } |
| 95 | + if( Nrounds>8 ){ |
| 96 | + kk = _mm_add_epi64(kk, kweyl); |
| 97 | + v = _mm_aesenc_si128(v, kk); |
| 98 | + } |
| 99 | + if( Nrounds>9 ){ |
| 100 | + kk = _mm_add_epi64(kk, kweyl); |
| 101 | + v = _mm_aesenc_si128(v, kk); |
| 102 | + } |
| 103 | + kk = _mm_add_epi64(kk, kweyl); |
| 104 | + v = _mm_aesenclast_si128(v, kk); |
| 105 | + ret.v[0].m = v; |
| 106 | + return ret; |
| 107 | +} |
| 108 | + |
| 109 | +/** @def ars1xm128i |
| 110 | +@ingroup AESNI |
| 111 | +The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/ |
| 112 | +#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k) |
| 113 | + |
| 114 | +/** @ingroup AESNI */ |
| 115 | +typedef struct r123array4x32 ars4x32_ctr_t; |
| 116 | +/** @ingroup AESNI */ |
| 117 | +typedef struct r123array4x32 ars4x32_key_t; |
| 118 | +/** @ingroup AESNI */ |
| 119 | +typedef struct r123array4x32 ars4x32_ukey_t; |
| 120 | +/** @ingroup AESNI */ |
| 121 | +enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS}; |
| 122 | +/** @ingroup AESNI */ |
| 123 | +R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; } |
| 124 | +/** @ingroup AESNI */ |
| 125 | +R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){ |
| 126 | + ars1xm128i_ctr_t c128; |
| 127 | + ars1xm128i_key_t k128; |
| 128 | + c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]); |
| 129 | + k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]); |
| 130 | + c128 = ars1xm128i_R(Nrounds, c128, k128); |
| 131 | + _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m); |
| 132 | + return c; |
| 133 | +} |
| 134 | + |
| 135 | +/** @def ars4x32 |
| 136 | +@ingroup AESNI |
| 137 | +The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/ |
| 138 | +#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k) |
| 139 | + |
| 140 | +#ifdef __cplusplus |
| 141 | +namespace hydra_r123{ |
| 142 | +/** |
| 143 | +@ingroup AESNI |
| 144 | +
|
| 145 | +ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class. |
| 146 | +
|
| 147 | +ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule |
| 148 | +to save time and space. |
| 149 | +
|
| 150 | +ARS1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which |
| 151 | +should occur only when the compiler is configured to generate AES-NI instructions (or |
| 152 | +when defaults are overridden by compile-time, compiler-command-line options). |
| 153 | +
|
| 154 | +The template argument, ROUNDS, is the number of times the ARS round |
| 155 | +functions will be applied. |
| 156 | +
|
| 157 | +As of September 2011, the authors know of no statistical flaws with |
| 158 | +ROUNDS=5 or more. |
| 159 | +
|
| 160 | +@class ARS1xm128i_R |
| 161 | +
|
| 162 | +*/ |
| 163 | +template<unsigned int ROUNDS> |
| 164 | +struct ARS1xm128i_R{ |
| 165 | + typedef ars1xm128i_ctr_t ctr_type; |
| 166 | + typedef ars1xm128i_key_t key_type; |
| 167 | + typedef ars1xm128i_key_t ukey_type; |
| 168 | + static const unsigned int rounds=ROUNDS; |
| 169 | + R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ |
| 170 | + return ars1xm128i_R(ROUNDS, ctr, key); |
| 171 | + } |
| 172 | +}; |
| 173 | + |
| 174 | +/** @class ARS4x32_R |
| 175 | + @ingroup AESNI |
| 176 | +*/ |
| 177 | + |
| 178 | +template<unsigned int ROUNDS> |
| 179 | +struct ARS4x32_R{ |
| 180 | + typedef ars4x32_ctr_t ctr_type; |
| 181 | + typedef ars4x32_key_t key_type; |
| 182 | + typedef ars4x32_key_t ukey_type; |
| 183 | + static const unsigned int rounds=ROUNDS; |
| 184 | + R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ |
| 185 | + return ars4x32_R(ROUNDS, ctr, key); |
| 186 | + } |
| 187 | +}; |
| 188 | +/** |
| 189 | +@ingroup AESNI |
| 190 | +
|
| 191 | +@class ARS1xm128i_R |
| 192 | + ARS1xm128i is equivalent to ARS1xm128i_R<7>. With 7 rounds, |
| 193 | + the ARS1xm128i CBRNG has a considerable safety margin over the minimum number |
| 194 | + of rounds with no known statistical flaws, but still has excellent |
| 195 | + performance. */ |
| 196 | +typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i; |
| 197 | +typedef ARS4x32_R<ars4x32_rounds> ARS4x32; |
| 198 | +} // namespace r123 |
| 199 | + |
| 200 | +#endif /* __cplusplus */ |
| 201 | + |
| 202 | +#endif /* R123_USE_AES_NI */ |
| 203 | + |
| 204 | +#endif |
0 commit comments