Skip to content

Commit 4437d6e

Browse files
committed
8-way AVX2 implementation for double SHA256 on 64-byte inputs
1 parent 230294b commit 4437d6e

File tree

8 files changed

+402
-5
lines changed

8 files changed

+402
-5
lines changed

configure.ac

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ fi
305305
# compatibility.
306306
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
307307
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
308+
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
308309

309310
TEMP_CXXFLAGS="$CXXFLAGS"
310311
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
@@ -347,6 +348,25 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
347348
)
348349
CXXFLAGS="$TEMP_CXXFLAGS"
349350

351+
TEMP_CXXFLAGS="$CXXFLAGS"
352+
CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
353+
AC_MSG_CHECKING(for AVX2 intrinsics)
354+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
355+
#include <stdint.h>
356+
#if defined(_MSC_VER)
357+
#include <immintrin.h>
358+
#elif defined(__GNUC__) && defined(__AVX2__)
359+
#include <x86intrin.h>
360+
#endif
361+
]],[[
362+
__m256i l = _mm256_set1_epi32(0);
363+
return _mm256_extract_epi32(l, 7);
364+
]])],
365+
[ AC_MSG_RESULT(yes); enable_avx2=yes; AC_DEFINE(ENABLE_AVX2, 1, [Define this symbol to build code that uses AVX2 intrinsics]) ],
366+
[ AC_MSG_RESULT(no)]
367+
)
368+
CXXFLAGS="$TEMP_CXXFLAGS"
369+
350370
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"
351371

352372
AC_ARG_WITH([utils],
@@ -1266,6 +1286,7 @@ AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes])
12661286
AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
12671287
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
12681288
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
1289+
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
12691290
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])
12701291

12711292
AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
@@ -1305,6 +1326,7 @@ AC_SUBST(SANITIZER_CXXFLAGS)
13051326
AC_SUBST(SANITIZER_LDFLAGS)
13061327
AC_SUBST(SSE42_CXXFLAGS)
13071328
AC_SUBST(SSE41_CXXFLAGS)
1329+
AC_SUBST(AVX2_CXXFLAGS)
13081330
AC_SUBST(LIBTOOL_APP_LDFLAGS)
13091331
AC_SUBST(USE_UPNP)
13101332
AC_SUBST(USE_QRCODE)

src/Makefile.am

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ LIBBITCOIN_CLI=libbitcoin_cli.a
3131
LIBBITCOIN_UTIL=libbitcoin_util.a
3232
LIBBITCOIN_CRYPTO=crypto/libbitcoin_crypto.a
3333
LIBBITCOIN_CRYPTO_SSE41=crypto/libbitcoin_crypto_sse41.a
34+
LIBBITCOIN_CRYPTO_AVX2=crypto/libbitcoin_crypto_avx2.a
3435
LIBBITCOINQT=qt/libbitcoinqt.a
3536
LIBSECP256K1=secp256k1/libsecp256k1.la
3637

@@ -52,6 +53,7 @@ $(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
5253
EXTRA_LIBRARIES += \
5354
$(LIBBITCOIN_CRYPTO) \
5455
$(LIBBITCOIN_CRYPTO_SSE41) \
56+
$(LIBBITCOIN_CRYPTO_AVX2) \
5557
$(LIBBITCOIN_UTIL) \
5658
$(LIBBITCOIN_COMMON) \
5759
$(LIBBITCOIN_CONSENSUS) \
@@ -299,6 +301,14 @@ crypto_libbitcoin_crypto_sse41_a_CPPFLAGS += -DENABLE_SSE41
299301
endif
300302
crypto_libbitcoin_crypto_sse41_a_SOURCES = crypto/sha256_sse41.cpp
301303

304+
crypto_libbitcoin_crypto_avx2_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
305+
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS = $(AM_CPPFLAGS)
306+
if ENABLE_AVX2
307+
crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
308+
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
309+
endif
310+
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp
311+
302312
# consensus: shared between all executables that validate any consensus rules.
303313
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
304314
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
@@ -422,6 +432,7 @@ bitcoind_LDADD = \
422432
$(LIBBITCOIN_CONSENSUS) \
423433
$(LIBBITCOIN_CRYPTO) \
424434
$(LIBBITCOIN_CRYPTO_SSE41) \
435+
$(LIBBITCOIN_CRYPTO_AVX2) \
425436
$(LIBLEVELDB) \
426437
$(LIBLEVELDB_SSE42) \
427438
$(LIBMEMENV) \
@@ -444,7 +455,8 @@ bitcoin_cli_LDADD = \
444455
$(LIBUNIVALUE) \
445456
$(LIBBITCOIN_UTIL) \
446457
$(LIBBITCOIN_CRYPTO) \
447-
$(LIBBITCOIN_CRYPTO_SSE41)
458+
$(LIBBITCOIN_CRYPTO_SSE41) \
459+
$(LIBBITCOIN_CRYPTO_AVX2)
448460

449461
bitcoin_cli_LDADD += $(BOOST_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(EVENT_LIBS)
450462
#
@@ -466,6 +478,7 @@ bitcoin_tx_LDADD = \
466478
$(LIBBITCOIN_CONSENSUS) \
467479
$(LIBBITCOIN_CRYPTO) \
468480
$(LIBBITCOIN_CRYPTO_SSE41) \
481+
$(LIBBITCOIN_CRYPTO_AVX2) \
469482
$(LIBSECP256K1)
470483

471484
bitcoin_tx_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)

src/Makefile.bench.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ bench_bench_bitcoin_LDADD = \
4040
$(LIBBITCOIN_CONSENSUS) \
4141
$(LIBBITCOIN_CRYPTO) \
4242
$(LIBBITCOIN_CRYPTO_SSE41) \
43+
$(LIBBITCOIN_CRYPTO_AVX2) \
4344
$(LIBLEVELDB) \
4445
$(LIBLEVELDB_SSE42) \
4546
$(LIBMEMENV) \

src/Makefile.qt.include

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ endif
407407
if ENABLE_ZMQ
408408
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
409409
endif
410-
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) \
410+
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBBITCOIN_CRYPTO_AVX2) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) \
411411
$(BOOST_LIBS) $(QT_LIBS) $(QT_DBUS_LIBS) $(QR_LIBS) $(PROTOBUF_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
412412
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)
413413
qt_bitcoin_qt_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(QT_LDFLAGS) $(LIBTOOL_APP_LDFLAGS)

src/Makefile.qttest.include

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ endif
6262
if ENABLE_ZMQ
6363
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
6464
endif
65-
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBUNIVALUE) $(LIBLEVELDB) \
65+
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBBITCOIN_CRYPTO_AVX2) $(LIBUNIVALUE) $(LIBLEVELDB) \
6666
$(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(QT_DBUS_LIBS) $(QT_TEST_LIBS) $(QT_LIBS) \
6767
$(QR_LIBS) $(PROTOBUF_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
6868
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)

src/Makefile.test.include

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ if ENABLE_WALLET
109109
test_test_bitcoin_LDADD += $(LIBBITCOIN_WALLET)
110110
endif
111111

112-
test_test_bitcoin_LDADD += $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBUNIVALUE) \
112+
test_test_bitcoin_LDADD += $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBBITCOIN_CRYPTO_AVX2) $(LIBUNIVALUE) \
113113
$(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(LIBSECP256K1) $(EVENT_LIBS) $(EVENT_PTHREADS_LIBS)
114114
test_test_bitcoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
115115

@@ -135,6 +135,7 @@ test_test_bitcoin_fuzzy_LDADD = \
135135
$(LIBBITCOIN_CONSENSUS) \
136136
$(LIBBITCOIN_CRYPTO) \
137137
$(LIBBITCOIN_CRYPTO_SSE41) \
138+
$(LIBBITCOIN_CRYPTO_AVX2) \
138139
$(LIBSECP256K1)
139140

140141
test_test_bitcoin_fuzzy_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)

src/crypto/sha256.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ namespace sha256d64_sse41
2424
void Transform_4way(unsigned char* out, const unsigned char* in);
2525
}
2626

27+
namespace sha256d64_avx2
28+
{
29+
void Transform_8way(unsigned char* out, const unsigned char* in);
30+
}
31+
2732
// Internal implementation code.
2833
namespace
2934
{
@@ -471,19 +476,37 @@ bool SelfTest(TransformType tr) {
471476
TransformType Transform = sha256::Transform;
472477
TransformD64Type TransformD64 = sha256::TransformD64;
473478
TransformD64Type TransformD64_4way = nullptr;
479+
TransformD64Type TransformD64_8way = nullptr;
480+
481+
#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__))
482+
// We can't use cpuid.h's __get_cpuid as it does not support subleafs.
483+
void inline cpuid(uint32_t leaf, uint32_t subleaf, uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d)
484+
{
485+
__asm__ ("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(leaf), "2"(subleaf));
486+
}
487+
#endif
474488
} // namespace
475489

490+
476491
std::string SHA256AutoDetect()
477492
{
478493
std::string ret = "standard";
479494
#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__))
480495
uint32_t eax, ebx, ecx, edx;
481-
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx >> 19) & 1) {
496+
cpuid(1, 0, eax, ebx, ecx, edx);
497+
if ((ecx >> 19) & 1) {
482498
Transform = sha256_sse4::Transform;
483499
TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
484500
#if defined(ENABLE_SSE41) && !defined(BUILD_BITCOIN_INTERNAL)
485501
TransformD64_4way = sha256d64_sse41::Transform_4way;
486502
ret = "sse4(1way+4way)";
503+
#if defined(ENABLE_AVX2) && !defined(BUILD_BITCOIN_INTERNAL)
504+
cpuid(7, 0, eax, ebx, ecx, edx);
505+
if ((ebx >> 5) & 1) {
506+
TransformD64_8way = sha256d64_avx2::Transform_8way;
507+
ret += ",avx2(8way)";
508+
}
509+
#endif
487510
#else
488511
ret = "sse4";
489512
#endif
@@ -553,6 +576,14 @@ CSHA256& CSHA256::Reset()
553576

554577
void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
555578
{
579+
if (TransformD64_8way) {
580+
while (blocks >= 8) {
581+
TransformD64_8way(out, in);
582+
out += 256;
583+
in += 512;
584+
blocks -= 8;
585+
}
586+
}
556587
if (TransformD64_4way) {
557588
while (blocks >= 4) {
558589
TransformD64_4way(out, in);

0 commit comments

Comments
 (0)