From 9951df36fe6fa6e6fccd7eab41c1abb0d36cbd21 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 6 Aug 2025 10:54:06 -0700 Subject: [PATCH 1/2] aarch64: Add -msimd-memops option controlling SIMD usage in memset/memcpy This option (enabled by default) preserves existing behavior by allowing use of Advanced SIMD registers while expanding memset/memcpy/memmove operations into inline instructions. Disabling this option prevents use of these registers for environments where the FPU may be disabled to reduce the cost of saving/restoring the processor state, such as in interrupt handlers. Signed-off-by: Keith Packard (cherry picked from commit 65837c38791876a9a56170983890dfc4620e47c6) --- gcc/common/config/aarch64/aarch64-common.cc | 4 ++++ gcc/config/aarch64/aarch64.cc | 10 ++++++---- gcc/config/aarch64/aarch64.h | 7 +++++++ gcc/config/aarch64/aarch64.opt | 4 ++++ gcc/config/aarch64/aarch64.opt.urls | 2 ++ gcc/doc/invoke.texi | 10 +++++++++- 6 files changed, 32 insertions(+), 5 deletions(-) diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index 951d041d3109b..2d28dc40792f3 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -133,6 +133,10 @@ aarch64_handle_option (struct gcc_options *opts, opts->x_aarch64_flag_outline_atomics = val; return true; + case OPT_msimd_memops: + opts->x_aarch64_flag_simd_memops = val; + return true; + default: return true; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index c6b278ec5c1c9..0be56bff72cfd 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19371,6 +19371,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] = OPT_msign_return_address_ }, { "outline-atomics", aarch64_attr_bool, true, NULL, OPT_moutline_atomics}, + { "simd-memops", aarch64_attr_bool, true, NULL, + OPT_msimd_memops}, { NULL, aarch64_attr_custom, false, NULL, OPT____ } }; @@ -26652,8 +26654,8 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) return aarch64_expand_cpymem_mops (operands, is_memmove); unsigned HOST_WIDE_INT size = UINTVAL (operands[2]); - bool use_ldpq = TARGET_SIMD && !(aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS); + bool use_ldpq = TARGET_SIMD_MEMOPS && !(aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS); /* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */ unsigned max_copy_size = use_ldpq ? 256 : 128; @@ -26673,7 +26675,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) ??? Although it would be possible to use LDP/STP Qn in streaming mode (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear whether that would improve performance. */ - bool use_qregs = size > 24 && TARGET_SIMD; + bool use_qregs = size > 24 && TARGET_SIMD_MEMOPS; base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); dst = adjust_automodify_address (dst, VOIDmode, base, 0); @@ -26814,7 +26816,7 @@ aarch64_expand_setmem (rtx *operands) machine_mode cur_mode = BLKmode, next_mode; /* Variable-sized or strict-align memset may use the MOPS expansion. */ - if (!CONST_INT_P (operands[1]) || !TARGET_SIMD + if (!CONST_INT_P (operands[1]) || !TARGET_SIMD_MEMOPS || (STRICT_ALIGNMENT && align < 16)) return aarch64_expand_setmem_mops (operands); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index fe02a02a57b3d..43a9f8ac28a68 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -125,6 +125,13 @@ of LSE instructions. */ #define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics) +#ifndef AARCH64_SIMD_MEMOPS_DEFAULT +#define AARCH64_SIMD_MEMOPS_DEFAULT 1 +#endif + +/* Allow use of SIMD registers for memory copy and set expansions */ +#define TARGET_SIMD_MEMOPS (TARGET_SIMD && aarch64_flag_simd_memops) + /* Align definitions of arrays, unions and structures so that initializations and copies can be made more efficient. This is not ABI-changing, so it only affects places where we can see the diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 6356c419399bd..fe9b91d1a72cf 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -332,6 +332,10 @@ moutline-atomics Target Var(aarch64_flag_outline_atomics) Init(2) Save Generate local calls to out-of-line atomic operations. +msimd-memops +Target Var(aarch64_flag_simd_memops) Init(AARCH64_SIMD_MEMOPS_DEFAULT) Save +Allow use of SIMD registers in memory set/copy expansions. + -param=aarch64-vect-compare-costs= Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 1) Param When vectorizing, consider using multiple different approaches and use diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls index 993634c52f880..788f6c75468f3 100644 --- a/gcc/config/aarch64/aarch64.opt.urls +++ b/gcc/config/aarch64/aarch64.opt.urls @@ -91,3 +91,5 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg) mstack-protector-guard-offset= UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset) +msimd-memops +UrlSuffix(gcc/AArch64-Options.html#index-msimd-memops) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3f487db6cad75..327e374fddfe0 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -807,7 +807,7 @@ Objective-C and Objective-C++ Dialects}. -moverride=@var{string} -mverbose-cost-dump -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} -mstack-protector-guard-offset=@var{offset} -mtrack-speculation --moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion} +-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion -msimd-memops} @emph{Adapteva Epiphany Options} @gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @@ -21357,6 +21357,14 @@ used directly. The same applies when using @option{-mcpu=} when the selected cpu supports the @samp{lse} feature. This option is on by default. +@item -msimd-memops +@itemx -mno-simd-memops +Enable or disable use of Advanced SIMD registers when expanding memory +copy and memory set operations. Use of these registers can improve +performance and reduce instruction count for these operations. This +option is ignored unless Advanced SIMD registers are available. +This option is on by default. + @opindex march @item -march=@var{name} Specify the name of the target architecture and, optionally, one or From 8dc17e5e9e3c6c078fe9fcbf26058af27bd13c7c Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 6 Aug 2025 10:57:23 -0700 Subject: [PATCH 2/2] aarch64: Make -mno-simd-memops default for aarch64*-*-elf targets These targets are regularly used for embedded applications for portions of code where the Advanced SIMD registers may not be preserved, such as interrupt handlers. Disabling the use of SIMD registers for memset/memcpy operations by default ensures that code which doesn't overtly perform FPU or SIMD operations won't end up using those registers unintentially. Signed-off-by: Keith Packard (cherry picked from commit d626b83c4b9f436d0ada66b054d5bd0966899688) --- gcc/config/aarch64/aarch64-elf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/config/aarch64/aarch64-elf.h b/gcc/config/aarch64/aarch64-elf.h index b6fb7936789fe..19426c75136c9 100644 --- a/gcc/config/aarch64/aarch64-elf.h +++ b/gcc/config/aarch64/aarch64-elf.h @@ -144,4 +144,7 @@ ASM_MABI_SPEC #undef TYPE_OPERAND_FMT #define TYPE_OPERAND_FMT "%%%s" +#undef AARCH64_SIMD_MEMOPS_DEFAULT +#define AARCH64_SIMD_MEMOPS_DEFAULT 0 + #endif /* GCC_AARCH64_ELF_H */