Skip to content

Commit 230294b

Browse files
committed
4-way SSE4.1 implementation for double SHA256 on 64-byte inputs
1 parent 1f0e7ca commit 230294b

File tree

8 files changed

+385
-7
lines changed

8 files changed

+385
-7
lines changed

configure.ac

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ fi
304304
# be compiled with them, rather that specific objects/libs may use them after checking for runtime
305305
# compatibility.
306306
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
307+
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
307308

308309
TEMP_CXXFLAGS="$CXXFLAGS"
309310
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
@@ -327,6 +328,25 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
327328
)
328329
CXXFLAGS="$TEMP_CXXFLAGS"
329330

331+
TEMP_CXXFLAGS="$CXXFLAGS"
332+
CXXFLAGS="$CXXFLAGS $SSE41_CXXFLAGS"
333+
AC_MSG_CHECKING(for SSE4.1 intrinsics)
334+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
335+
#include <stdint.h>
336+
#if defined(_MSC_VER)
337+
#include <immintrin.h>
338+
#elif defined(__GNUC__)
339+
#include <x86intrin.h>
340+
#endif
341+
]],[[
342+
__m128i l = _mm_set1_epi32(0);
343+
return _mm_extract_epi32(l, 3);
344+
]])],
345+
[ AC_MSG_RESULT(yes); enable_sse41=yes; AC_DEFINE(ENABLE_SSE41, 1, [Define this symbol to build code that uses SSE4.1 intrinsics]) ],
346+
[ AC_MSG_RESULT(no)]
347+
)
348+
CXXFLAGS="$TEMP_CXXFLAGS"
349+
330350
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"
331351

332352
AC_ARG_WITH([utils],
@@ -1245,6 +1265,7 @@ AM_CONDITIONAL([USE_LCOV],[test x$use_lcov = xyes])
12451265
AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes])
12461266
AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
12471267
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
1268+
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
12481269
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])
12491270

12501271
AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
@@ -1283,6 +1304,7 @@ AC_SUBST(PIE_FLAGS)
12831304
AC_SUBST(SANITIZER_CXXFLAGS)
12841305
AC_SUBST(SANITIZER_LDFLAGS)
12851306
AC_SUBST(SSE42_CXXFLAGS)
1307+
AC_SUBST(SSE41_CXXFLAGS)
12861308
AC_SUBST(LIBTOOL_APP_LDFLAGS)
12871309
AC_SUBST(USE_UPNP)
12881310
AC_SUBST(USE_QRCODE)

src/Makefile.am

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ LIBBITCOIN_CONSENSUS=libbitcoin_consensus.a
3030
LIBBITCOIN_CLI=libbitcoin_cli.a
3131
LIBBITCOIN_UTIL=libbitcoin_util.a
3232
LIBBITCOIN_CRYPTO=crypto/libbitcoin_crypto.a
33+
LIBBITCOIN_CRYPTO_SSE41=crypto/libbitcoin_crypto_sse41.a
3334
LIBBITCOINQT=qt/libbitcoinqt.a
3435
LIBSECP256K1=secp256k1/libsecp256k1.la
3536

@@ -50,6 +51,7 @@ $(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
5051
# But to build the less dependent modules first, we manually select their order here:
5152
EXTRA_LIBRARIES += \
5253
$(LIBBITCOIN_CRYPTO) \
54+
$(LIBBITCOIN_CRYPTO_SSE41) \
5355
$(LIBBITCOIN_UTIL) \
5456
$(LIBBITCOIN_COMMON) \
5557
$(LIBBITCOIN_CONSENSUS) \
@@ -289,6 +291,14 @@ if USE_ASM
289291
crypto_libbitcoin_crypto_a_SOURCES += crypto/sha256_sse4.cpp
290292
endif
291293

294+
crypto_libbitcoin_crypto_sse41_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
295+
crypto_libbitcoin_crypto_sse41_a_CPPFLAGS = $(AM_CPPFLAGS)
296+
if ENABLE_SSE41
297+
crypto_libbitcoin_crypto_sse41_a_CXXFLAGS += $(SSE41_CXXFLAGS)
298+
crypto_libbitcoin_crypto_sse41_a_CPPFLAGS += -DENABLE_SSE41
299+
endif
300+
crypto_libbitcoin_crypto_sse41_a_SOURCES = crypto/sha256_sse41.cpp
301+
292302
# consensus: shared between all executables that validate any consensus rules.
293303
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
294304
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
@@ -411,6 +421,7 @@ bitcoind_LDADD = \
411421
$(LIBBITCOIN_ZMQ) \
412422
$(LIBBITCOIN_CONSENSUS) \
413423
$(LIBBITCOIN_CRYPTO) \
424+
$(LIBBITCOIN_CRYPTO_SSE41) \
414425
$(LIBLEVELDB) \
415426
$(LIBLEVELDB_SSE42) \
416427
$(LIBMEMENV) \
@@ -432,7 +443,8 @@ bitcoin_cli_LDADD = \
432443
$(LIBBITCOIN_CLI) \
433444
$(LIBUNIVALUE) \
434445
$(LIBBITCOIN_UTIL) \
435-
$(LIBBITCOIN_CRYPTO)
446+
$(LIBBITCOIN_CRYPTO) \
447+
$(LIBBITCOIN_CRYPTO_SSE41)
436448

437449
bitcoin_cli_LDADD += $(BOOST_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(EVENT_LIBS)
438450
#
@@ -453,6 +465,7 @@ bitcoin_tx_LDADD = \
453465
$(LIBBITCOIN_UTIL) \
454466
$(LIBBITCOIN_CONSENSUS) \
455467
$(LIBBITCOIN_CRYPTO) \
468+
$(LIBBITCOIN_CRYPTO_SSE41) \
456469
$(LIBSECP256K1)
457470

458471
bitcoin_tx_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)

src/Makefile.bench.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ bench_bench_bitcoin_LDADD = \
3939
$(LIBBITCOIN_UTIL) \
4040
$(LIBBITCOIN_CONSENSUS) \
4141
$(LIBBITCOIN_CRYPTO) \
42+
$(LIBBITCOIN_CRYPTO_SSE41) \
4243
$(LIBLEVELDB) \
4344
$(LIBLEVELDB_SSE42) \
4445
$(LIBMEMENV) \

src/Makefile.qt.include

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ endif
407407
if ENABLE_ZMQ
408408
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
409409
endif
410-
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) \
410+
qt_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBUNIVALUE) $(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) \
411411
$(BOOST_LIBS) $(QT_LIBS) $(QT_DBUS_LIBS) $(QR_LIBS) $(PROTOBUF_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
412412
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)
413413
qt_bitcoin_qt_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(QT_LDFLAGS) $(LIBTOOL_APP_LDFLAGS)

src/Makefile.qttest.include

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ endif
6262
if ENABLE_ZMQ
6363
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_ZMQ) $(ZMQ_LIBS)
6464
endif
65-
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBUNIVALUE) $(LIBLEVELDB) \
65+
qt_test_test_bitcoin_qt_LDADD += $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBUNIVALUE) $(LIBLEVELDB) \
6666
$(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(QT_DBUS_LIBS) $(QT_TEST_LIBS) $(QT_LIBS) \
6767
$(QR_LIBS) $(PROTOBUF_LIBS) $(BDB_LIBS) $(SSL_LIBS) $(CRYPTO_LIBS) $(MINIUPNPC_LIBS) $(LIBSECP256K1) \
6868
$(EVENT_PTHREADS_LIBS) $(EVENT_LIBS)

src/Makefile.test.include

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ test_test_bitcoin_LDADD =
108108
if ENABLE_WALLET
109109
test_test_bitcoin_LDADD += $(LIBBITCOIN_WALLET)
110110
endif
111-
test_test_bitcoin_LDADD += $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBUNIVALUE) \
111+
112+
test_test_bitcoin_LDADD += $(LIBBITCOIN_SERVER) $(LIBBITCOIN_CLI) $(LIBBITCOIN_COMMON) $(LIBBITCOIN_UTIL) $(LIBBITCOIN_CONSENSUS) $(LIBBITCOIN_CRYPTO) $(LIBBITCOIN_CRYPTO_SSE41) $(LIBUNIVALUE) \
112113
$(LIBLEVELDB) $(LIBLEVELDB_SSE42) $(LIBMEMENV) $(BOOST_LIBS) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(LIBSECP256K1) $(EVENT_LIBS) $(EVENT_PTHREADS_LIBS)
113114
test_test_bitcoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
114115

@@ -133,6 +134,7 @@ test_test_bitcoin_fuzzy_LDADD = \
133134
$(LIBBITCOIN_UTIL) \
134135
$(LIBBITCOIN_CONSENSUS) \
135136
$(LIBBITCOIN_CRYPTO) \
137+
$(LIBBITCOIN_CRYPTO_SSE41) \
136138
$(LIBSECP256K1)
137139

138140
test_test_bitcoin_fuzzy_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)

src/crypto/sha256.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
1919
#endif
2020
#endif
2121

22+
namespace sha256d64_sse41
23+
{
24+
void Transform_4way(unsigned char* out, const unsigned char* in);
25+
}
26+
2227
// Internal implementation code.
2328
namespace
2429
{
@@ -465,22 +470,28 @@ bool SelfTest(TransformType tr) {
465470

466471
TransformType Transform = sha256::Transform;
467472
TransformD64Type TransformD64 = sha256::TransformD64;
473+
TransformD64Type TransformD64_4way = nullptr;
468474
} // namespace
469475

470476
std::string SHA256AutoDetect()
471477
{
478+
std::string ret = "standard";
472479
#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__))
473480
uint32_t eax, ebx, ecx, edx;
474481
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx >> 19) & 1) {
475482
Transform = sha256_sse4::Transform;
476483
TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
477-
assert(SelfTest(Transform));
478-
return "sse4";
484+
#if defined(ENABLE_SSE41) && !defined(BUILD_BITCOIN_INTERNAL)
485+
TransformD64_4way = sha256d64_sse41::Transform_4way;
486+
ret = "sse4(1way+4way)";
487+
#else
488+
ret = "sse4";
489+
#endif
479490
}
480491
#endif
481492

482493
assert(SelfTest(Transform));
483-
return "standard";
494+
return ret;
484495
}
485496

486497
////// SHA-256
@@ -542,6 +553,14 @@ CSHA256& CSHA256::Reset()
542553

543554
void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
544555
{
556+
if (TransformD64_4way) {
557+
while (blocks >= 4) {
558+
TransformD64_4way(out, in);
559+
out += 128;
560+
in += 256;
561+
blocks -= 4;
562+
}
563+
}
545564
while (blocks) {
546565
TransformD64(out, in);
547566
out += 32;

0 commit comments

Comments
 (0)