Skip to content

Commit 2dbb56d

Browse files
committed
detail/random , hydra::ars engine added
1 parent 15b1aa2 commit 2dbb56d

File tree

3 files changed

+390
-3
lines changed

3 files changed

+390
-3
lines changed

hydra/Random.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <hydra/detail/utility/Utility_Tuple.h>
4343
#include <hydra/detail/random/philox.h>
4444
#include <hydra/detail/random/threefry.h>
45+
#include <hydra/detail/random/ars.h>
4546

4647
#include <hydra/Range.h>
4748

@@ -66,9 +67,11 @@ namespace hydra{
6667
* \note \p default_random_engine is currently an alias for \p minstd_rand, and may change
6768
* in a future version.
6869
*/
70+
6971
//typedef hydra_thrust::random::default_random_engine default_random_engine;
70-
//typedef hydra::random::philox default_random_engine;
71-
typedef hydra::random::threefry default_random_engine;
72+
typedef hydra::random::philox default_random_engine;
73+
//typedef hydra::random::threefry default_random_engine;
74+
//typedef hydra::random::ars default_random_engine;
7275

7376
/*! \typedef minstd_rand0
7477
* \brief A random number engine with predefined parameters which implements a version of
@@ -120,12 +123,19 @@ typedef hydra_thrust::random::taus88 taus88;
120123
*/
121124
typedef hydra::random::philox philox;
122125

123-
/*! \typedef philox
126+
/*! \typedef threefry
124127
* \brief Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output.
125128
*
126129
*/
127130
typedef hydra::random::threefry threefry;
128131

132+
/*! \typedef ars
133+
* \brief Ars uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
134+
to save time and space..
135+
*
136+
*/
137+
typedef hydra::random::ars ars;
138+
129139
namespace detail {
130140

131141
namespace random {
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
Copyright 2010-2011, D. E. Shaw Research.
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are
7+
met:
8+
9+
* Redistributions of source code must retain the above copyright
10+
notice, this list of conditions, and the following disclaimer.
11+
12+
* Redistributions in binary form must reproduce the above copyright
13+
notice, this list of conditions, and the following disclaimer in the
14+
documentation and/or other materials provided with the distribution.
15+
16+
* Neither the name of D. E. Shaw Research nor the names of its
17+
contributors may be used to endorse or promote products derived from
18+
this software without specific prior written permission.
19+
20+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
*/
32+
#ifndef __Random123_ars_dot_hpp__
33+
#define __Random123_ars_dot_hpp__
34+
35+
#include "features/compilerfeatures.h"
36+
#include "array.h"
37+
38+
#if R123_USE_AES_NI
39+
40+
#ifndef ARS1xm128i_DEFAULT_ROUNDS
41+
#define ARS1xm128i_DEFAULT_ROUNDS 7
42+
#endif
43+
44+
/** @ingroup AESNI */
45+
enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
46+
47+
/* ARS1xm128i with Weyl keys. Fast, and Crush-resistant, but NOT CRYPTO. */
48+
/** @ingroup AESNI */
49+
typedef struct r123array1xm128i ars1xm128i_ctr_t;
50+
/** @ingroup AESNI */
51+
typedef struct r123array1xm128i ars1xm128i_key_t;
52+
/** @ingroup AESNI */
53+
typedef struct r123array1xm128i ars1xm128i_ukey_t;
54+
/** @ingroup AESNI */
55+
R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
56+
/** @ingroup AESNI */
57+
R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
58+
__m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */
59+
R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */
60+
/* N.B. the aesenc instructions do the xor *after*
61+
// so if we want to follow the AES pattern, we
62+
// have to do the initial xor explicitly */
63+
__m128i kk = k.v[0].m;
64+
__m128i v = _mm_xor_si128(in.v[0].m, kk);
65+
ars1xm128i_ctr_t ret;
66+
R123_ASSERT(Nrounds<=10);
67+
if( Nrounds>1 ){
68+
kk = _mm_add_epi64(kk, kweyl);
69+
v = _mm_aesenc_si128(v, kk);
70+
}
71+
if( Nrounds>2 ){
72+
kk = _mm_add_epi64(kk, kweyl);
73+
v = _mm_aesenc_si128(v, kk);
74+
}
75+
if( Nrounds>3 ){
76+
kk = _mm_add_epi64(kk, kweyl);
77+
v = _mm_aesenc_si128(v, kk);
78+
}
79+
if( Nrounds>4 ){
80+
kk = _mm_add_epi64(kk, kweyl);
81+
v = _mm_aesenc_si128(v, kk);
82+
}
83+
if( Nrounds>5 ){
84+
kk = _mm_add_epi64(kk, kweyl);
85+
v = _mm_aesenc_si128(v, kk);
86+
}
87+
if( Nrounds>6 ){
88+
kk = _mm_add_epi64(kk, kweyl);
89+
v = _mm_aesenc_si128(v, kk);
90+
}
91+
if( Nrounds>7 ){
92+
kk = _mm_add_epi64(kk, kweyl);
93+
v = _mm_aesenc_si128(v, kk);
94+
}
95+
if( Nrounds>8 ){
96+
kk = _mm_add_epi64(kk, kweyl);
97+
v = _mm_aesenc_si128(v, kk);
98+
}
99+
if( Nrounds>9 ){
100+
kk = _mm_add_epi64(kk, kweyl);
101+
v = _mm_aesenc_si128(v, kk);
102+
}
103+
kk = _mm_add_epi64(kk, kweyl);
104+
v = _mm_aesenclast_si128(v, kk);
105+
ret.v[0].m = v;
106+
return ret;
107+
}
108+
109+
/** @def ars1xm128i
110+
@ingroup AESNI
111+
The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/
112+
#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
113+
114+
/** @ingroup AESNI */
115+
typedef struct r123array4x32 ars4x32_ctr_t;
116+
/** @ingroup AESNI */
117+
typedef struct r123array4x32 ars4x32_key_t;
118+
/** @ingroup AESNI */
119+
typedef struct r123array4x32 ars4x32_ukey_t;
120+
/** @ingroup AESNI */
121+
enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
122+
/** @ingroup AESNI */
123+
R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
124+
/** @ingroup AESNI */
125+
R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
126+
ars1xm128i_ctr_t c128;
127+
ars1xm128i_key_t k128;
128+
c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
129+
k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
130+
c128 = ars1xm128i_R(Nrounds, c128, k128);
131+
_mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
132+
return c;
133+
}
134+
135+
/** @def ars4x32
136+
@ingroup AESNI
137+
The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/
138+
#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
139+
140+
#ifdef __cplusplus
141+
namespace hydra_r123{
142+
/**
143+
@ingroup AESNI
144+
145+
ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
146+
147+
ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
148+
to save time and space.
149+
150+
ARS1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
151+
should occur only when the compiler is configured to generate AES-NI instructions (or
152+
when defaults are overridden by compile-time, compiler-command-line options).
153+
154+
The template argument, ROUNDS, is the number of times the ARS round
155+
functions will be applied.
156+
157+
As of September 2011, the authors know of no statistical flaws with
158+
ROUNDS=5 or more.
159+
160+
@class ARS1xm128i_R
161+
162+
*/
163+
template<unsigned int ROUNDS>
164+
struct ARS1xm128i_R{
165+
typedef ars1xm128i_ctr_t ctr_type;
166+
typedef ars1xm128i_key_t key_type;
167+
typedef ars1xm128i_key_t ukey_type;
168+
static const unsigned int rounds=ROUNDS;
169+
R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
170+
return ars1xm128i_R(ROUNDS, ctr, key);
171+
}
172+
};
173+
174+
/** @class ARS4x32_R
175+
@ingroup AESNI
176+
*/
177+
178+
template<unsigned int ROUNDS>
179+
struct ARS4x32_R{
180+
typedef ars4x32_ctr_t ctr_type;
181+
typedef ars4x32_key_t key_type;
182+
typedef ars4x32_key_t ukey_type;
183+
static const unsigned int rounds=ROUNDS;
184+
R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
185+
return ars4x32_R(ROUNDS, ctr, key);
186+
}
187+
};
188+
/**
189+
@ingroup AESNI
190+
191+
@class ARS1xm128i_R
192+
ARS1xm128i is equivalent to ARS1xm128i_R<7>. With 7 rounds,
193+
the ARS1xm128i CBRNG has a considerable safety margin over the minimum number
194+
of rounds with no known statistical flaws, but still has excellent
195+
performance. */
196+
typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
197+
typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
198+
} // namespace r123
199+
200+
#endif /* __cplusplus */
201+
202+
#endif /* R123_USE_AES_NI */
203+
204+
#endif

0 commit comments

Comments
 (0)