Skip to content

Commit c23bf06

Browse files
committed
Merge bitcoin/bitcoin#24115: ARMv8 SHA2 Intrinsics
aaa1d03 Add optimized sha256d64_arm_shani::Transform_2way (Pieter Wuille) fe06298 Implement sha256_arm_shani::Transform (Pavol Rusnak) 48a72fa Add sha256_arm_shani to build system (Pavol Rusnak) c2b7934 Rename SHANI to X86_SHANI to allow future implementation of ARM_SHANI (Pavol Rusnak) Pull request description: This PR adds support for ARMv8 SHA2 Intrinsics. Fixes bitcoin/bitcoin#13401 and bitcoin/bitcoin#17414 * Integration part was done by me. * The original SHA2 NI code comes from https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-arm.c * Minor optimizations from https://github.com/rollmeister/bitcoin-armv8/blob/master/src/crypto/sha256.cpp are applied too. * The 2-way transform added by @sipa ACKs for top commit: laanwj: Code review and lightly tested ACK aaa1d03 Tree-SHA512: 9689d6390c004269cb1ee79ed05430d7d35a6efef2554a2b6732f7258a11e7e959b3306c04b4e8637a9623fb4c12d1c1b3592da0ff0dc6d737932db302509669
2 parents 3ce40e6 + aaa1d03 commit c23bf06

File tree

5 files changed

+1012
-30
lines changed

5 files changed

+1012
-30
lines changed

configure.ac

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ AX_CHECK_COMPILE_FLAG([-fno-extended-identifiers], [CXXFLAGS="$CXXFLAGS -fno-ext
469469
enable_sse42=no
470470
enable_sse41=no
471471
enable_avx2=no
472-
enable_shani=no
472+
enable_x86_shani=no
473473

474474
if test "$use_asm" = "yes"; then
475475

@@ -481,7 +481,7 @@ dnl x86
481481
AX_CHECK_COMPILE_FLAG([-msse4.2], [SSE42_CXXFLAGS="-msse4.2"], [], [$CXXFLAG_WERROR])
482482
AX_CHECK_COMPILE_FLAG([-msse4.1], [SSE41_CXXFLAGS="-msse4.1"], [], [$CXXFLAG_WERROR])
483483
AX_CHECK_COMPILE_FLAG([-mavx -mavx2], [AVX2_CXXFLAGS="-mavx -mavx2"], [], [$CXXFLAG_WERROR])
484-
AX_CHECK_COMPILE_FLAG([-msse4 -msha], [SHANI_CXXFLAGS="-msse4 -msha"], [], [$CXXFLAG_WERROR])
484+
AX_CHECK_COMPILE_FLAG([-msse4 -msha], [X86_SHANI_CXXFLAGS="-msse4 -msha"], [], [$CXXFLAG_WERROR])
485485

486486
enable_clmul=
487487
AX_CHECK_COMPILE_FLAG([-mpclmul], [enable_clmul=yes], [], [$CXXFLAG_WERROR], [AC_LANG_PROGRAM([
@@ -554,8 +554,8 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
554554
CXXFLAGS="$TEMP_CXXFLAGS"
555555

556556
TEMP_CXXFLAGS="$CXXFLAGS"
557-
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
558-
AC_MSG_CHECKING([for SHA-NI intrinsics])
557+
CXXFLAGS="$CXXFLAGS $X86_SHANI_CXXFLAGS"
558+
AC_MSG_CHECKING([for x86 SHA-NI intrinsics])
559559
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
560560
#include <stdint.h>
561561
#include <immintrin.h>
@@ -565,17 +565,18 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
565565
__m128i k = _mm_set1_epi32(2);
566566
return _mm_extract_epi32(_mm_sha256rnds2_epu32(i, i, k), 0);
567567
]])],
568-
[ AC_MSG_RESULT([yes]); enable_shani=yes; AC_DEFINE([ENABLE_SHANI], [1], [Define this symbol to build code that uses SHA-NI intrinsics]) ],
568+
[ AC_MSG_RESULT([yes]); enable_x86_shani=yes; AC_DEFINE([ENABLE_X86_SHANI], [1], [Define this symbol to build code that uses x86 SHA-NI intrinsics]) ],
569569
[ AC_MSG_RESULT([no])]
570570
)
571571
CXXFLAGS="$TEMP_CXXFLAGS"
572572

573573
# ARM
574574
AX_CHECK_COMPILE_FLAG([-march=armv8-a+crc+crypto], [ARM_CRC_CXXFLAGS="-march=armv8-a+crc+crypto"], [], [$CXXFLAG_WERROR])
575+
AX_CHECK_COMPILE_FLAG([-march=armv8-a+crc+crypto], [ARM_SHANI_CXXFLAGS="-march=armv8-a+crc+crypto"], [], [$CXXFLAG_WERROR])
575576

576577
TEMP_CXXFLAGS="$CXXFLAGS"
577578
CXXFLAGS="$CXXFLAGS $ARM_CRC_CXXFLAGS"
578-
AC_MSG_CHECKING([for AArch64 CRC32 intrinsics])
579+
AC_MSG_CHECKING([for ARMv8 CRC32 intrinsics])
579580
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
580581
#include <arm_acle.h>
581582
#include <arm_neon.h>
@@ -592,6 +593,24 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
592593
)
593594
CXXFLAGS="$TEMP_CXXFLAGS"
594595

596+
TEMP_CXXFLAGS="$CXXFLAGS"
597+
CXXFLAGS="$CXXFLAGS $ARM_SHANI_CXXFLAGS"
598+
AC_MSG_CHECKING([for ARMv8 SHA-NI intrinsics])
599+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
600+
#include <arm_acle.h>
601+
#include <arm_neon.h>
602+
]],[[
603+
uint32x4_t a, b, c;
604+
vsha256h2q_u32(a, b, c);
605+
vsha256hq_u32(a, b, c);
606+
vsha256su0q_u32(a, b);
607+
vsha256su1q_u32(a, b, c);
608+
]])],
609+
[ AC_MSG_RESULT([yes]); enable_arm_shani=yes; AC_DEFINE([ENABLE_ARM_SHANI], [1], [Define this symbol to build code that uses ARMv8 SHA-NI intrinsics]) ],
610+
[ AC_MSG_RESULT([no])]
611+
)
612+
CXXFLAGS="$TEMP_CXXFLAGS"
613+
595614
fi
596615

597616
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO"
@@ -1774,8 +1793,9 @@ AM_CONDITIONAL([HARDEN], [test "$use_hardening" = "yes"])
17741793
AM_CONDITIONAL([ENABLE_SSE42], [test "$enable_sse42" = "yes"])
17751794
AM_CONDITIONAL([ENABLE_SSE41], [test "$enable_sse41" = "yes"])
17761795
AM_CONDITIONAL([ENABLE_AVX2], [test "$enable_avx2" = "yes"])
1777-
AM_CONDITIONAL([ENABLE_SHANI], [test "$enable_shani" = "yes"])
1796+
AM_CONDITIONAL([ENABLE_X86_SHANI], [test "$enable_x86_shani" = "yes"])
17781797
AM_CONDITIONAL([ENABLE_ARM_CRC], [test "$enable_arm_crc" = "yes"])
1798+
AM_CONDITIONAL([ENABLE_ARM_SHANI], [test "$enable_arm_shani" = "yes"])
17791799
AM_CONDITIONAL([USE_ASM], [test "$use_asm" = "yes"])
17801800
AM_CONDITIONAL([WORDS_BIGENDIAN], [test "$ac_cv_c_bigendian" = "yes"])
17811801
AM_CONDITIONAL([USE_NATPMP], [test "$use_natpmp" = "yes"])
@@ -1832,8 +1852,9 @@ AC_SUBST(SSE42_CXXFLAGS)
18321852
AC_SUBST(SSE41_CXXFLAGS)
18331853
AC_SUBST(CLMUL_CXXFLAGS)
18341854
AC_SUBST(AVX2_CXXFLAGS)
1835-
AC_SUBST(SHANI_CXXFLAGS)
1855+
AC_SUBST(X86_SHANI_CXXFLAGS)
18361856
AC_SUBST(ARM_CRC_CXXFLAGS)
1857+
AC_SUBST(ARM_SHANI_CXXFLAGS)
18371858
AC_SUBST(LIBTOOL_APP_LDFLAGS)
18381859
AC_SUBST(USE_SQLITE)
18391860
AC_SUBST(USE_BDB)

src/Makefile.am

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@ if ENABLE_AVX2
4646
LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.a
4747
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2)
4848
endif
49-
if ENABLE_SHANI
50-
LIBBITCOIN_CRYPTO_SHANI = crypto/libbitcoin_crypto_shani.a
51-
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SHANI)
49+
if ENABLE_X86_SHANI
50+
LIBBITCOIN_CRYPTO_X86_SHANI = crypto/libbitcoin_crypto_x86_shani.a
51+
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_X86_SHANI)
52+
endif
53+
if ENABLE_ARM_SHANI
54+
LIBBITCOIN_CRYPTO_ARM_SHANI = crypto/libbitcoin_crypto_arm_shani.a
55+
LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_ARM_SHANI)
5256
endif
5357

5458
$(LIBSECP256K1): $(wildcard secp256k1/src/*.h) $(wildcard secp256k1/src/*.c) $(wildcard secp256k1/include/*)
@@ -498,11 +502,17 @@ crypto_libbitcoin_crypto_avx2_a_CXXFLAGS += $(AVX2_CXXFLAGS)
498502
crypto_libbitcoin_crypto_avx2_a_CPPFLAGS += -DENABLE_AVX2
499503
crypto_libbitcoin_crypto_avx2_a_SOURCES = crypto/sha256_avx2.cpp
500504

501-
crypto_libbitcoin_crypto_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
502-
crypto_libbitcoin_crypto_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
503-
crypto_libbitcoin_crypto_shani_a_CXXFLAGS += $(SHANI_CXXFLAGS)
504-
crypto_libbitcoin_crypto_shani_a_CPPFLAGS += -DENABLE_SHANI
505-
crypto_libbitcoin_crypto_shani_a_SOURCES = crypto/sha256_shani.cpp
505+
crypto_libbitcoin_crypto_x86_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
506+
crypto_libbitcoin_crypto_x86_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
507+
crypto_libbitcoin_crypto_x86_shani_a_CXXFLAGS += $(X86_SHANI_CXXFLAGS)
508+
crypto_libbitcoin_crypto_x86_shani_a_CPPFLAGS += -DENABLE_X86_SHANI
509+
crypto_libbitcoin_crypto_x86_shani_a_SOURCES = crypto/sha256_x86_shani.cpp
510+
511+
crypto_libbitcoin_crypto_arm_shani_a_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
512+
crypto_libbitcoin_crypto_arm_shani_a_CPPFLAGS = $(AM_CPPFLAGS)
513+
crypto_libbitcoin_crypto_arm_shani_a_CXXFLAGS += $(ARM_SHANI_CXXFLAGS)
514+
crypto_libbitcoin_crypto_arm_shani_a_CPPFLAGS += -DENABLE_ARM_SHANI
515+
crypto_libbitcoin_crypto_arm_shani_a_SOURCES = crypto/sha256_arm_shani.cpp
506516

507517
# consensus: shared between all executables that validate any consensus rules.
508518
libbitcoin_consensus_a_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)

src/crypto/sha256.cpp

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@
1010

1111
#include <compat/cpuid.h>
1212

13+
#if defined(__linux__) && defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
14+
#include <sys/auxv.h>
15+
#include <asm/hwcap.h>
16+
#endif
17+
18+
#if defined(MAC_OSX) && defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
19+
#include <sys/types.h>
20+
#include <sys/sysctl.h>
21+
#endif
22+
1323
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
1424
#if defined(USE_ASM)
1525
namespace sha256_sse4
@@ -29,16 +39,26 @@ namespace sha256d64_avx2
2939
void Transform_8way(unsigned char* out, const unsigned char* in);
3040
}
3141

32-
namespace sha256d64_shani
42+
namespace sha256d64_x86_shani
3343
{
3444
void Transform_2way(unsigned char* out, const unsigned char* in);
3545
}
3646

37-
namespace sha256_shani
47+
namespace sha256_x86_shani
3848
{
3949
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
4050
}
4151

52+
namespace sha256_arm_shani
53+
{
54+
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
55+
}
56+
57+
namespace sha256d64_arm_shani
58+
{
59+
void Transform_2way(unsigned char* out, const unsigned char* in);
60+
}
61+
4262
// Internal implementation code.
4363
namespace
4464
{
@@ -567,15 +587,15 @@ std::string SHA256AutoDetect()
567587
bool have_xsave = false;
568588
bool have_avx = false;
569589
bool have_avx2 = false;
570-
bool have_shani = false;
590+
bool have_x86_shani = false;
571591
bool enabled_avx = false;
572592

573593
(void)AVXEnabled;
574594
(void)have_sse4;
575595
(void)have_avx;
576596
(void)have_xsave;
577597
(void)have_avx2;
578-
(void)have_shani;
598+
(void)have_x86_shani;
579599
(void)enabled_avx;
580600

581601
uint32_t eax, ebx, ecx, edx;
@@ -589,15 +609,15 @@ std::string SHA256AutoDetect()
589609
if (have_sse4) {
590610
GetCPUID(7, 0, eax, ebx, ecx, edx);
591611
have_avx2 = (ebx >> 5) & 1;
592-
have_shani = (ebx >> 29) & 1;
612+
have_x86_shani = (ebx >> 29) & 1;
593613
}
594614

595-
#if defined(ENABLE_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
596-
if (have_shani) {
597-
Transform = sha256_shani::Transform;
598-
TransformD64 = TransformD64Wrapper<sha256_shani::Transform>;
599-
TransformD64_2way = sha256d64_shani::Transform_2way;
600-
ret = "shani(1way,2way)";
615+
#if defined(ENABLE_X86_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
616+
if (have_x86_shani) {
617+
Transform = sha256_x86_shani::Transform;
618+
TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>;
619+
TransformD64_2way = sha256d64_x86_shani::Transform_2way;
620+
ret = "x86_shani(1way,2way)";
601621
have_sse4 = false; // Disable SSE4/AVX2;
602622
have_avx2 = false;
603623
}
@@ -623,6 +643,38 @@ std::string SHA256AutoDetect()
623643
#endif
624644
#endif
625645

646+
#if defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
647+
bool have_arm_shani = false;
648+
649+
#if defined(__linux__)
650+
#if defined(__arm__) // 32-bit
651+
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
652+
have_arm_shani = true;
653+
}
654+
#endif
655+
#if defined(__aarch64__) // 64-bit
656+
if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
657+
have_arm_shani = true;
658+
}
659+
#endif
660+
#endif
661+
662+
#if defined(MAC_OSX)
663+
int val = 0;
664+
size_t len = sizeof(val);
665+
if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
666+
have_arm_shani = val != 0;
667+
}
668+
#endif
669+
670+
if (have_arm_shani) {
671+
Transform = sha256_arm_shani::Transform;
672+
TransformD64 = TransformD64Wrapper<sha256_arm_shani::Transform>;
673+
TransformD64_2way = sha256d64_arm_shani::Transform_2way;
674+
ret = "arm_shani(1way,2way)";
675+
}
676+
#endif
677+
626678
assert(SelfTest());
627679
return ret;
628680
}

0 commit comments

Comments
 (0)