Skip to content

Commit 5222f85

Browse files
riptlripatel-fd
authored andcommitted
chacha: add ChaCha8 backend and bench
1 parent 71259cc commit 5222f85

21 files changed

+610
-522
lines changed

src/ballet/chacha/Local.mk

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Base ChaCha support
2+
$(call add-hdrs,fd_chacha.h)
3+
ifdef FD_HAS_SSE
4+
$(call add-objs,fd_chacha_sse,fd_ballet)
5+
else
6+
$(call add-objs,fd_chacha,fd_ballet)
7+
endif
8+
$(call make-unit-test,test_chacha,test_chacha,fd_ballet fd_util)
9+
$(call run-unit-test,test_chacha)
10+
11+
# ChaCha-RNG support (Rust rand_chacha compatible)
12+
$(call add-hdrs,fd_chacha_rng.h)
13+
$(call add-objs,fd_chacha_rng,fd_ballet)
14+
ifdef FD_HAS_AVX512
15+
$(call add-objs,fd_chacha_rng_avx512,fd_ballet)
16+
endif
17+
ifdef FD_HAS_AVX
18+
$(call add-objs,fd_chacha_rng_avx,fd_ballet)
19+
endif
20+
$(call make-unit-test,test_chacha_rng,test_chacha_rng,fd_ballet fd_util)
21+
$(call make-unit-test,test_chacha_rng_roll,test_chacha_rng_roll,fd_ballet fd_util)
22+
$(call run-unit-test,test_chacha_rng)

src/ballet/chacha20/fd_chacha20.c renamed to src/ballet/chacha/fd_chacha.c

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
1-
#include "fd_chacha20.h"
1+
#include "fd_chacha.h"
22

33
/* Reference implementation of the ChaCha20 block function.
44
55
FIXME Not optimized for high performance. Trivially parallelizable
66
via SSE or AVX if required. */
77

88
static inline void
9-
fd_chacha20_quarter_round( uint * a,
10-
uint * b,
11-
uint * c,
12-
uint * d ) {
9+
fd_chacha_quarter_round( uint * a,
10+
uint * b,
11+
uint * c,
12+
uint * d ) {
1313
*a += *b; *d ^= *a; *d = fd_uint_rotate_left(*d, 16);
1414
*c += *d; *b ^= *c; *b = fd_uint_rotate_left(*b, 12);
1515
*a += *b; *d ^= *a; *d = fd_uint_rotate_left(*d, 8);
1616
*c += *d; *b ^= *c; *b = fd_uint_rotate_left(*b, 7);
1717
}
1818

19-
void *
20-
fd_chacha20_block( void * _block,
21-
void const * _key,
22-
void const * _idx_nonce ) {
19+
__attribute__((always_inline)) static inline void *
20+
fd_chacha_block( void * _block,
21+
void const * _key,
22+
void const * _idx_nonce,
23+
ulong rnd2_cnt ) {
2324

2425
uint * block = __builtin_assume_aligned( _block, 64UL );
2526
uint const * key = __builtin_assume_aligned( _key, 32UL );
@@ -55,15 +56,15 @@ fd_chacha20_block( void * _block,
5556
/* Run the ChaCha round function 20 times.
5657
(Each iteration does a column round and a diagonal round.) */
5758

58-
for( ulong i=0UL; i<10UL; i++ ) {
59-
fd_chacha20_quarter_round( &block[ 0 ], &block[ 4 ], &block[ 8 ], &block[ 12 ] );
60-
fd_chacha20_quarter_round( &block[ 1 ], &block[ 5 ], &block[ 9 ], &block[ 13 ] );
61-
fd_chacha20_quarter_round( &block[ 2 ], &block[ 6 ], &block[ 10 ], &block[ 14 ] );
62-
fd_chacha20_quarter_round( &block[ 3 ], &block[ 7 ], &block[ 11 ], &block[ 15 ] );
63-
fd_chacha20_quarter_round( &block[ 0 ], &block[ 5 ], &block[ 10 ], &block[ 15 ] );
64-
fd_chacha20_quarter_round( &block[ 1 ], &block[ 6 ], &block[ 11 ], &block[ 12 ] );
65-
fd_chacha20_quarter_round( &block[ 2 ], &block[ 7 ], &block[ 8 ], &block[ 13 ] );
66-
fd_chacha20_quarter_round( &block[ 3 ], &block[ 4 ], &block[ 9 ], &block[ 14 ] );
59+
for( ulong i=0UL; i<rnd2_cnt; i++ ) {
60+
fd_chacha_quarter_round( &block[ 0 ], &block[ 4 ], &block[ 8 ], &block[ 12 ] );
61+
fd_chacha_quarter_round( &block[ 1 ], &block[ 5 ], &block[ 9 ], &block[ 13 ] );
62+
fd_chacha_quarter_round( &block[ 2 ], &block[ 6 ], &block[ 10 ], &block[ 14 ] );
63+
fd_chacha_quarter_round( &block[ 3 ], &block[ 7 ], &block[ 11 ], &block[ 15 ] );
64+
fd_chacha_quarter_round( &block[ 0 ], &block[ 5 ], &block[ 10 ], &block[ 15 ] );
65+
fd_chacha_quarter_round( &block[ 1 ], &block[ 6 ], &block[ 11 ], &block[ 12 ] );
66+
fd_chacha_quarter_round( &block[ 2 ], &block[ 7 ], &block[ 8 ], &block[ 13 ] );
67+
fd_chacha_quarter_round( &block[ 3 ], &block[ 4 ], &block[ 9 ], &block[ 14 ] );
6768
}
6869

6970
/* Complete the block by adding the input state */
@@ -74,3 +75,16 @@ fd_chacha20_block( void * _block,
7475
return (void *)block;
7576
}
7677

78+
void *
79+
fd_chacha8_block( void * _block,
80+
void const * _key,
81+
void const * _idx_nonce ) {
82+
return fd_chacha_block( _block, _key, _idx_nonce, 4UL );
83+
}
84+
85+
void *
86+
fd_chacha20_block( void * _block,
87+
void const * _key,
88+
void const * _idx_nonce ) {
89+
return fd_chacha_block( _block, _key, _idx_nonce, 10UL );
90+
}

src/ballet/chacha20/fd_chacha20.h renamed to src/ballet/chacha/fd_chacha.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
#include "../fd_ballet_base.h"
55

6-
/* FD_CHACHA20_BLOCK_SZ is the output size of the ChaCha20 block function. */
6+
/* FD_CHACHA_BLOCK_SZ is the output size of the ChaCha20 block function. */
77

8-
#define FD_CHACHA20_BLOCK_SZ (64UL)
8+
#define FD_CHACHA_BLOCK_SZ (64UL)
99

1010
/* FD_CHACHA20_KEY_SZ is the size of the ChaCha20 encryption key */
1111

@@ -23,6 +23,11 @@ FD_PROTOTYPES_BEGIN
2323
2424
FIXME this should probably do multiple blocks */
2525

26+
void *
27+
fd_chacha8_block( void * block,
28+
void const * key,
29+
void const * idx_nonce );
30+
2631
void *
2732
fd_chacha20_block( void * block,
2833
void const * key,

src/ballet/chacha/fd_chacha_rng.c

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#include "fd_chacha_rng.h"
2+
3+
FD_FN_CONST ulong
4+
fd_chacha_rng_align( void ) {
5+
return alignof(fd_chacha_rng_t);
6+
}
7+
8+
FD_FN_CONST ulong
9+
fd_chacha_rng_footprint( void ) {
10+
return sizeof(fd_chacha_rng_t);
11+
}
12+
13+
void *
14+
fd_chacha_rng_new( void * shmem, int mode ) {
15+
if( FD_UNLIKELY( !shmem ) ) {
16+
FD_LOG_WARNING(( "NULL shmem" ));
17+
return NULL;
18+
}
19+
if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, alignof(fd_chacha_rng_t) ) ) ) {
20+
FD_LOG_WARNING(( "misaligned shmem" ));
21+
return NULL;
22+
}
23+
memset( shmem, 0, sizeof(fd_chacha_rng_t) );
24+
if( FD_UNLIKELY( (mode!=FD_CHACHA_RNG_MODE_MOD) & (mode!=FD_CHACHA_RNG_MODE_SHIFT) ) ) {
25+
FD_LOG_WARNING(( "invalid mode" ));
26+
return NULL;
27+
}
28+
((fd_chacha_rng_t *)shmem)->mode = mode;
29+
30+
return shmem;
31+
}
32+
33+
fd_chacha_rng_t *
34+
fd_chacha_rng_join( void * shrng ) {
35+
if( FD_UNLIKELY( !shrng ) ) {
36+
FD_LOG_WARNING(( "NULL shrng" ));
37+
return NULL;
38+
}
39+
return (fd_chacha_rng_t *)shrng;
40+
}
41+
42+
void *
43+
fd_chacha_rng_leave( fd_chacha_rng_t * rng ) {
44+
if( FD_UNLIKELY( !rng ) ) {
45+
FD_LOG_WARNING(( "NULL rng" ));
46+
return NULL;
47+
}
48+
return (void *)rng;
49+
}
50+
51+
void *
52+
fd_chacha_rng_delete( void * shrng ) {
53+
if( FD_UNLIKELY( !shrng ) ) {
54+
FD_LOG_WARNING(( "NULL shrng" ));
55+
return NULL;
56+
}
57+
memset( shrng, 0, sizeof(fd_chacha_rng_t) );
58+
return shrng;
59+
}
60+
61+
fd_chacha_rng_t *
62+
fd_chacha20_rng_init( fd_chacha_rng_t * rng,
63+
void const * key ) {
64+
memcpy( rng->key, key, FD_CHACHA20_KEY_SZ );
65+
rng->buf_off = 0UL;
66+
rng->buf_fill = 0UL;
67+
fd_chacha20_rng_private_refill( rng );
68+
return rng;
69+
}
70+
71+
static void
72+
fd_chacha_rng_refill_seq( fd_chacha_rng_t * rng,
73+
void * (* block_fn)( void *, void const *, void const * ) ) {
74+
ulong fill_target = FD_CHACHA_RNG_BUFSZ - FD_CHACHA_BLOCK_SZ;
75+
76+
ulong buf_avail;
77+
while( (buf_avail=(rng->buf_fill - rng->buf_off))<fill_target ) {
78+
ulong idx = rng->buf_fill >> 6;
79+
uint idx_nonce[4] __attribute__((aligned(16))) =
80+
{ (uint)idx, 0U, 0U, 0U };
81+
block_fn( rng->buf + (rng->buf_fill % FD_CHACHA_RNG_BUFSZ),
82+
rng->key,
83+
idx_nonce );
84+
rng->buf_fill += (uint)FD_CHACHA_BLOCK_SZ;
85+
}
86+
}
87+
88+
void
89+
fd_chacha8_rng_refill_seq( fd_chacha_rng_t * rng ) {
90+
fd_chacha_rng_refill_seq( rng, fd_chacha8_block );
91+
}
92+
93+
void
94+
fd_chacha20_rng_refill_seq( fd_chacha_rng_t * rng ) {
95+
fd_chacha_rng_refill_seq( rng, fd_chacha20_block );
96+
}

0 commit comments

Comments
 (0)