Skip to content

Commit 795c8cf

Browse files
committed
bela: sync
1 parent aa256bc commit 795c8cf

File tree

9 files changed

+29
-16
lines changed

9 files changed

+29
-16
lines changed

vendor/bela.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
https://github.com/fcharlie/bela/tree/b9657738333083633a89da8b4de282645a442ef6
1+
https://github.com/fcharlie/bela/tree/273ffe59699e48e0cacab33fcc2050fc5ee002d3
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
https://github.com/BLAKE3-team/BLAKE3
2-
f84636e59ce575e5dd127399e0c7de0c1ea595da
2+
64747d48ffe9d1fbf4b71e94cabeb8a211461081

vendor/bela/src/belahash/blake3/blake3.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
extern "C" {
99
#endif
1010

11-
#define BLAKE3_VERSION_STRING "1.3.1"
11+
#define BLAKE3_VERSION_STRING "1.3.3"
1212
#define BLAKE3_KEY_LEN 32
1313
#define BLAKE3_OUT_LEN 32
1414
#define BLAKE3_BLOCK_LEN 64

vendor/bela/src/belahash/blake3/blake3_avx2_x86-64_windows_gnu.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1784,7 +1784,7 @@ blake3_hash_many_avx2:
17841784
vmovdqu xmmword ptr [rbx+0x10], xmm1
17851785
jmp 4b
17861786

1787-
.section .rodata
1787+
.section .rdata
17881788
.p2align 6
17891789
ADD0:
17901790
.long 0, 1, 2, 3, 4, 5, 6, 7

vendor/bela/src/belahash/blake3/blake3_avx512.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,13 +1047,26 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
10471047
INLINE void load_counters16(uint64_t counter, bool increment_counter,
10481048
__m512i *out_lo, __m512i *out_hi) {
10491049
const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter);
1050-
const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1051-
const __m512i add1 = _mm512_and_si512(mask, add0);
1052-
__m512i l = _mm512_add_epi32(_mm512_set1_epi32((int32_t)counter), add1);
1053-
__mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT);
1054-
__m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32((int32_t)(counter >> 32)), carry, _mm512_set1_epi32((int32_t)(counter >> 32)), _mm512_set1_epi32(1));
1055-
*out_lo = l;
1056-
*out_hi = h;
1050+
const __m512i deltas = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1051+
const __m512i masked_deltas = _mm512_and_si512(deltas, mask);
1052+
const __m512i low_words = _mm512_add_epi32(
1053+
_mm512_set1_epi32((int32_t)counter),
1054+
masked_deltas);
1055+
// The carry bit is 1 if the high bit of the word was 1 before addition and is
1056+
// 0 after.
1057+
// NOTE: It would be a bit more natural to use _mm512_cmp_epu32_mask to
1058+
// compute the carry bits here, and originally we did, but that intrinsic is
1059+
// broken under GCC 5.4. See https://github.com/BLAKE3-team/BLAKE3/issues/271.
1060+
const __m512i carries = _mm512_srli_epi32(
1061+
_mm512_andnot_si512(
1062+
low_words, // 0 after (gets inverted by andnot)
1063+
_mm512_set1_epi32((int32_t)counter)), // and 1 before
1064+
31);
1065+
const __m512i high_words = _mm512_add_epi32(
1066+
_mm512_set1_epi32((int32_t)(counter >> 32)),
1067+
carries);
1068+
*out_lo = low_words;
1069+
*out_hi = high_words;
10571070
}
10581071

10591072
static

vendor/bela/src/belahash/blake3/blake3_avx512_x86-64_windows_gnu.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2587,7 +2587,7 @@ blake3_compress_xof_avx512:
25872587
add rsp, 72
25882588
ret
25892589

2590-
.section .rodata
2590+
.section .rdata
25912591
.p2align 6
25922592
INDEX0:
25932593
.long 0, 1, 2, 3, 16, 17, 18, 19

vendor/bela/src/belahash/blake3/blake3_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ static const uint8_t MSG_SCHEDULE[7][16] = {
8787
/* x is assumed to be nonzero. */
8888
static unsigned int highest_one(uint64_t x) {
8989
#if defined(__GNUC__) || defined(__clang__)
90-
return 63 ^ __builtin_clzll(x);
90+
return 63 ^ (unsigned int)__builtin_clzll(x);
9191
#elif defined(_MSC_VER) && defined(IS_X86_64)
9292
unsigned long index;
9393
_BitScanReverse64(&index, x);
@@ -117,7 +117,7 @@ static unsigned int highest_one(uint64_t x) {
117117
// Count the number of 1 bits.
118118
INLINE unsigned int popcnt(uint64_t x) {
119119
#if defined(__GNUC__) || defined(__clang__)
120-
return __builtin_popcountll(x);
120+
return (unsigned int)__builtin_popcountll(x);
121121
#else
122122
unsigned int count = 0;
123123
while (x != 0) {

vendor/bela/src/belahash/blake3/blake3_sse2_x86-64_windows_gnu.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2301,7 +2301,7 @@ blake3_compress_xof_sse2:
23012301
ret
23022302

23032303

2304-
.section .rodata
2304+
.section .rdata
23052305
.p2align 6
23062306
BLAKE3_IV:
23072307
.long 0x6A09E667, 0xBB67AE85

vendor/bela/src/belahash/blake3/blake3_sse41_x86-64_windows_gnu.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2042,7 +2042,7 @@ blake3_compress_xof_sse41:
20422042
ret
20432043

20442044

2045-
.section .rodata
2045+
.section .rdata
20462046
.p2align 6
20472047
BLAKE3_IV:
20482048
.long 0x6A09E667, 0xBB67AE85

0 commit comments

Comments
 (0)