Skip to content

Commit 4c935e2

Browse files
committed
Add SHA256 implementation using using Intel SHA intrinsics
1 parent 268400d commit 4c935e2

File tree

5 files changed

+438
-0
lines changed

5 files changed

+438
-0
lines changed

configure.ac

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ fi
319319
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
320320
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
321321
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
322+
AX_CHECK_COMPILE_FLAG([-msse4 -msha],[[SHANI_CXXFLAGS="-msse4 -msha"]],,[[$CXXFLAG_WERROR]])
322323

323324
TEMP_CXXFLAGS="$CXXFLAGS"
324325
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
@@ -380,6 +381,27 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
380381
)
381382
CXXFLAGS="$TEMP_CXXFLAGS"
382383

384+
TEMP_CXXFLAGS="$CXXFLAGS"
385+
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
386+
AC_MSG_CHECKING(for SHA-NI intrinsics)
387+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
388+
#include <stdint.h>
389+
#if defined(_MSC_VER)
390+
#include <immintrin.h>
391+
#elif defined(__GNUC__)
392+
#include <x86intrin.h>
393+
#endif
394+
]],[[
395+
__m128i i = _mm_set1_epi32(0);
396+
__m128i j = _mm_set1_epi32(1);
397+
__m128i k = _mm_set1_epi32(2);
398+
return _mm_extract_epi32(_mm_sha256rnds2_epu32(i, i, k), 0);
399+
]])],
400+
[ AC_MSG_RESULT(yes); enable_shani=yes; AC_DEFINE(ENABLE_SHANI, 1, [Define this symbol to build code that uses SHA-NI intrinsics]) ],
401+
[ AC_MSG_RESULT(no)]
402+
)
403+
CXXFLAGS="$TEMP_CXXFLAGS"
404+
383405
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"
384406

385407
AC_ARG_WITH([utils],
@@ -1300,6 +1322,7 @@ AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
13001322
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
13011323
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
13021324
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
1325+
AM_CONDITIONAL([ENABLE_SHANI],[test x$enable_shani = xyes])
13031326
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])
13041327

13051328
AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
@@ -1344,6 +1367,7 @@ AC_SUBST(SANITIZER_LDFLAGS)
13441367
AC_SUBST(SSE42_CXXFLAGS)
13451368
AC_SUBST(SSE41_CXXFLAGS)
13461369
AC_SUBST(AVX2_CXXFLAGS)
1370+
AC_SUBST(SHANI_CXXFLAGS)
13471371
AC_SUBST(LIBTOOL_APP_LDFLAGS)
13481372
AC_SUBST(USE_UPNP)
13491373
AC_SUBST(USE_QRCODE)

src/Makefile.am

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ if ENABLE_AVX2
5252
LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.a
5353
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2)
5454
endif
55+
if ENABLE_SHANI
56+
LIBBITCOIN_CRYPTO_SHANI = crypto/libbitcoin_crypto_shani.a
57+
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SHANI)
58+
endif
5559

5660
$(LIBSECP256K1): $(wildcard secp256k1/src/*) $(wildcard secp256k1/include/*)
5761
$(AM_V_at)$(MAKE) $(AM_MAKEFLAGS) -C $(@D) $(@F)
@@ -314,6 +318,12 @@ crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
314318
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
315319
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp
316320

321+
crypto_libbitcoin_crypto_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
322+
crypto_libbitcoin_crypto_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
323+
crypto_libbitcoin_crypto_shani_a_CXXFLAGS += $(SHANI_CXXFLAGS)
324+
crypto_libbitcoin_crypto_shani_a_CPPFLAGS += -DENABLE_SHANI
325+
crypto_libbitcoin_crypto_shani_a_SOURCES = crypto/sha256_shani.cpp
326+
317327
# consensus: shared between all executables that validate any consensus rules.
318328
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
319329
libbitcoin_consensus_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)

src/Makefile.test.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ test_test_bitcoin_fuzzy_LDADD = \
137137
$(LIBBITCOIN_CRYPTO) \
138138
$(LIBBITCOIN_CRYPTO_SSE41) \
139139
$(LIBBITCOIN_CRYPTO_AVX2) \
140+
$(LIBBITCOIN_CRYPTO_SHANI) \
140141
$(LIBSECP256K1)
141142

142143
test_test_bitcoin_fuzzy_LDADD += $(BOOST_LIBS) $(CRYPTO_LIBS)

src/crypto/sha256.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ namespace sha256d64_avx2
2929
void Transform_8way(unsigned char* out, const unsigned char* in);
3030
}
3131

32+
namespace sha256d64_shani
33+
{
34+
void Transform_2way(unsigned char* out, const unsigned char* in);
35+
}
36+
37+
namespace sha256_shani
38+
{
39+
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
40+
}
41+
3242
// Internal implementation code.
3343
namespace
3444
{
@@ -448,6 +458,7 @@ void TransformD64Wrapper(unsigned char* out, const unsigned char* in)
448458

449459
TransformType Transform = sha256::Transform;
450460
TransformD64Type TransformD64 = sha256::TransformD64;
461+
TransformD64Type TransformD64_2way = nullptr;
451462
TransformD64Type TransformD64_4way = nullptr;
452463
TransformD64Type TransformD64_8way = nullptr;
453464

@@ -512,6 +523,13 @@ bool SelfTest() {
512523
TransformD64(out, data + 1);
513524
if (!std::equal(out, out + 32, result_d64)) return false;
514525

526+
// Test TransformD64_2way, if available.
527+
if (TransformD64_2way) {
528+
unsigned char out[64];
529+
TransformD64_2way(out, data + 1);
530+
if (!std::equal(out, out + 64, result_d64)) return false;
531+
}
532+
515533
// Test TransformD64_4way, if available.
516534
if (TransformD64_4way) {
517535
unsigned char out[128];
@@ -556,13 +574,15 @@ std::string SHA256AutoDetect()
556574
bool have_xsave = false;
557575
bool have_avx = false;
558576
bool have_avx2 = false;
577+
bool have_shani = false;
559578
bool enabled_avx = false;
560579

561580
(void)AVXEnabled;
562581
(void)have_sse4;
563582
(void)have_avx;
564583
(void)have_xsave;
565584
(void)have_avx2;
585+
(void)have_shani;
566586
(void)enabled_avx;
567587

568588
uint32_t eax, ebx, ecx, edx;
@@ -576,8 +596,20 @@ std::string SHA256AutoDetect()
576596
if (have_sse4) {
577597
cpuid(7, 0, eax, ebx, ecx, edx);
578598
have_avx2 = (ebx >> 5) & 1;
599+
have_shani = (ebx >> 29) & 1;
579600
}
580601

602+
#if defined(ENABLE_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
603+
if (have_shani) {
604+
Transform = sha256_shani::Transform;
605+
TransformD64 = TransformD64Wrapper<sha256_shani::Transform>;
606+
TransformD64_2way = sha256d64_shani::Transform_2way;
607+
ret = "shani(1way,2way)";
608+
have_sse4 = false; // Disable SSE4/AVX2;
609+
have_avx2 = false;
610+
}
611+
#endif
612+
581613
if (have_sse4) {
582614
#if defined(__x86_64__) || defined(__amd64__)
583615
Transform = sha256_sse4::Transform;
@@ -677,6 +709,14 @@ void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
677709
blocks -= 4;
678710
}
679711
}
712+
if (TransformD64_2way) {
713+
while (blocks >= 2) {
714+
TransformD64_2way(out, in);
715+
out += 64;
716+
in += 128;
717+
blocks -= 2;
718+
}
719+
}
680720
while (blocks) {
681721
TransformD64(out, in);
682722
out += 32;

0 commit comments

Comments
 (0)