Skip to content

Commit ed641cf

Browse files
riptlripatel-fd
authored andcommitted
blake3: sync with upstream
Import AVX512 improvements and fast XOF output
1 parent f30a65f commit ed641cf

File tree

11 files changed

+663
-389
lines changed

11 files changed

+663
-389
lines changed

NOTICE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ From https://cr.yp.to/chacha.html and https://datatracker.ietf.org/doc/html/rfc7
235235
=======================================================================
236236

237237
The BLAKE3 implementation in src/ballet/blake3 is currently a modified
238-
implementation of the BLAKE3 project implementation circa 2023-May.
238+
implementation of the BLAKE3 project implementation circa 2025-Aug.
239239
From
240240
https://github.com/BLAKE3-team/BLAKE3/tree/master/c
241241

src/ballet/blake3/blake3.c

Lines changed: 134 additions & 115 deletions
Large diffs are not rendered by default.

src/ballet/blake3/blake3.h

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Source originally from https://github.com/BLAKE3-team/BLAKE3
2-
// From commit: 64747d48ffe9d1fbf4b71e94cabeb8a211461081
2+
// From commit: df610ddc3b93841ffc59a87e3da659a15910eb46
33

44
#ifndef BLAKE3_H
55
#define BLAKE3_H
@@ -11,7 +11,9 @@
1111
extern "C" {
1212
#endif
1313

14-
#define BLAKE3_VERSION_STRING "1.3.3"
14+
#define BLAKE3_API
15+
16+
#define BLAKE3_VERSION_STRING "1.8.2"
1517
#define BLAKE3_KEY_LEN 32
1618
#define BLAKE3_OUT_LEN 32
1719
#define BLAKE3_BLOCK_LEN 64
@@ -41,20 +43,20 @@ typedef struct {
4143
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
4244
} blake3_hasher;
4345

44-
const char *fd_blake3_version(void);
45-
void fd_blake3_hasher_init(blake3_hasher *self);
46-
void fd_blake3_hasher_init_keyed(blake3_hasher *self,
47-
const uint8_t key[BLAKE3_KEY_LEN]);
48-
void fd_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
49-
void fd_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
50-
size_t context_len);
51-
void fd_blake3_hasher_update(blake3_hasher *self, const void *input,
52-
size_t input_len);
53-
void fd_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
54-
size_t out_len);
55-
void fd_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
56-
uint8_t *out, size_t out_len);
57-
void fd_blake3_hasher_reset(blake3_hasher *self);
46+
BLAKE3_API const char *blake3_version(void);
47+
BLAKE3_API void blake3_hasher_init(blake3_hasher *self);
48+
BLAKE3_API void blake3_hasher_init_keyed(blake3_hasher *self,
49+
const uint8_t key[BLAKE3_KEY_LEN]);
50+
BLAKE3_API void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
51+
BLAKE3_API void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
52+
size_t context_len);
53+
BLAKE3_API void blake3_hasher_update(blake3_hasher *self, const void *input,
54+
size_t input_len);
55+
BLAKE3_API void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
56+
size_t out_len);
57+
BLAKE3_API void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
58+
uint8_t *out, size_t out_len);
59+
BLAKE3_API void blake3_hasher_reset(blake3_hasher *self);
5860

5961
#ifdef __cplusplus
6062
}

src/ballet/blake3/blake3_avx2.c

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
// Source originally from https://github.com/BLAKE3-team/BLAKE3
3-
// From commit: 64747d48ffe9d1fbf4b71e94cabeb8a211461081
3+
// From commit: 2dd4e57f68d85f3983b1880b66250fc7bdf0b7c8
44

55
#include "blake3_impl.h"
66

@@ -171,7 +171,7 @@ INLINE void transpose_vecs(__m256i vecs[DEGREE]) {
171171
__m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]);
172172
__m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]);
173173

174-
// Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is
174+
// Interleave 64-bit lanes. The low unpack is lanes 00/22 and the high is
175175
// 11/33.
176176
__m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145);
177177
__m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145);
@@ -232,10 +232,10 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
232232
}
233233

234234
static
235-
void fd_blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
236-
const uint32_t key[8], uint64_t counter,
237-
bool increment_counter, uint8_t flags,
238-
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
235+
void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
236+
const uint32_t key[8], uint64_t counter,
237+
bool increment_counter, uint8_t flags,
238+
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
239239
__m256i h_vecs[8] = {
240240
set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
241241
set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
@@ -291,27 +291,27 @@ void fd_blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
291291
}
292292

293293
#if FD_HAS_AVX
294-
void fd_blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
294+
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
295+
size_t blocks, const uint32_t key[8],
296+
uint64_t counter, bool increment_counter,
297+
uint8_t flags, uint8_t flags_start,
298+
uint8_t flags_end, uint8_t *out);
299+
#else
300+
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
295301
size_t blocks, const uint32_t key[8],
296302
uint64_t counter, bool increment_counter,
297303
uint8_t flags, uint8_t flags_start,
298304
uint8_t flags_end, uint8_t *out);
299-
#else
300-
void fd_blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
301-
size_t blocks, const uint32_t key[8],
302-
uint64_t counter, bool increment_counter,
303-
uint8_t flags, uint8_t flags_start,
304-
uint8_t flags_end, uint8_t *out);
305305
#endif /* FD_HAS_AVX */
306306

307-
void fd_blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
308-
size_t blocks, const uint32_t key[8],
309-
uint64_t counter, bool increment_counter,
310-
uint8_t flags, uint8_t flags_start,
311-
uint8_t flags_end, uint8_t *out) {
307+
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
308+
size_t blocks, const uint32_t key[8],
309+
uint64_t counter, bool increment_counter,
310+
uint8_t flags, uint8_t flags_start,
311+
uint8_t flags_end, uint8_t *out) {
312312
while (num_inputs >= DEGREE) {
313-
fd_blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
314-
flags_start, flags_end, out);
313+
blake3_hash8_avx2(inputs, blocks, key, counter, increment_counter, flags,
314+
flags_start, flags_end, out);
315315
if (increment_counter) {
316316
counter += DEGREE;
317317
}
@@ -320,11 +320,11 @@ void fd_blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
320320
out = &out[DEGREE * BLAKE3_OUT_LEN];
321321
}
322322
#if FD_HAS_AVX
323-
fd_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
324-
increment_counter, flags, flags_start, flags_end, out);
323+
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
324+
increment_counter, flags, flags_start, flags_end, out);
325325
#else
326-
fd_blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
327-
increment_counter, flags, flags_start, flags_end,
328-
out);
326+
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
327+
increment_counter, flags, flags_start, flags_end,
328+
out);
329329
#endif
330330
}

0 commit comments

Comments
 (0)