diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index 951d041d3109b..2d28dc40792f3 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -133,6 +133,10 @@ aarch64_handle_option (struct gcc_options *opts, opts->x_aarch64_flag_outline_atomics = val; return true; + case OPT_msimd_memops: + opts->x_aarch64_flag_simd_memops = val; + return true; + default: return true; } diff --git a/gcc/config/aarch64/aarch64-elf.h b/gcc/config/aarch64/aarch64-elf.h index b6fb7936789fe..19426c75136c9 100644 --- a/gcc/config/aarch64/aarch64-elf.h +++ b/gcc/config/aarch64/aarch64-elf.h @@ -144,4 +144,7 @@ ASM_MABI_SPEC #undef TYPE_OPERAND_FMT #define TYPE_OPERAND_FMT "%%%s" +#undef AARCH64_SIMD_MEMOPS_DEFAULT +#define AARCH64_SIMD_MEMOPS_DEFAULT 0 + #endif /* GCC_AARCH64_ELF_H */ diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index c6b278ec5c1c9..0be56bff72cfd 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19371,6 +19371,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] = OPT_msign_return_address_ }, { "outline-atomics", aarch64_attr_bool, true, NULL, OPT_moutline_atomics}, + { "simd-memops", aarch64_attr_bool, true, NULL, + OPT_msimd_memops}, { NULL, aarch64_attr_custom, false, NULL, OPT____ } }; @@ -26652,8 +26654,8 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) return aarch64_expand_cpymem_mops (operands, is_memmove); unsigned HOST_WIDE_INT size = UINTVAL (operands[2]); - bool use_ldpq = TARGET_SIMD && !(aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS); + bool use_ldpq = TARGET_SIMD_MEMOPS && !(aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS); /* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */ unsigned max_copy_size = use_ldpq ? 256 : 128; @@ -26673,7 +26675,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove) ??? Although it would be possible to use LDP/STP Qn in streaming mode (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear whether that would improve performance. */ - bool use_qregs = size > 24 && TARGET_SIMD; + bool use_qregs = size > 24 && TARGET_SIMD_MEMOPS; base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); dst = adjust_automodify_address (dst, VOIDmode, base, 0); @@ -26814,7 +26816,7 @@ aarch64_expand_setmem (rtx *operands) machine_mode cur_mode = BLKmode, next_mode; /* Variable-sized or strict-align memset may use the MOPS expansion. */ - if (!CONST_INT_P (operands[1]) || !TARGET_SIMD + if (!CONST_INT_P (operands[1]) || !TARGET_SIMD_MEMOPS || (STRICT_ALIGNMENT && align < 16)) return aarch64_expand_setmem_mops (operands); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index fe02a02a57b3d..43a9f8ac28a68 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -125,6 +125,13 @@ of LSE instructions. */ #define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics) +#ifndef AARCH64_SIMD_MEMOPS_DEFAULT +#define AARCH64_SIMD_MEMOPS_DEFAULT 1 +#endif + +/* Allow use of SIMD registers for memory copy and set expansions */ +#define TARGET_SIMD_MEMOPS (TARGET_SIMD && aarch64_flag_simd_memops) + /* Align definitions of arrays, unions and structures so that initializations and copies can be made more efficient. This is not ABI-changing, so it only affects places where we can see the diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 6356c419399bd..fe9b91d1a72cf 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -332,6 +332,10 @@ moutline-atomics Target Var(aarch64_flag_outline_atomics) Init(2) Save Generate local calls to out-of-line atomic operations. +msimd-memops +Target Var(aarch64_flag_simd_memops) Init(AARCH64_SIMD_MEMOPS_DEFAULT) Save +Allow use of SIMD registers in memory set/copy expansions. + -param=aarch64-vect-compare-costs= Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 1) Param When vectorizing, consider using multiple different approaches and use diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls index 993634c52f880..788f6c75468f3 100644 --- a/gcc/config/aarch64/aarch64.opt.urls +++ b/gcc/config/aarch64/aarch64.opt.urls @@ -91,3 +91,5 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg) mstack-protector-guard-offset= UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset) +msimd-memops +UrlSuffix(gcc/AArch64-Options.html#index-msimd-memops) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3f487db6cad75..327e374fddfe0 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -807,7 +807,7 @@ Objective-C and Objective-C++ Dialects}. -moverride=@var{string} -mverbose-cost-dump -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} -mstack-protector-guard-offset=@var{offset} -mtrack-speculation --moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion} +-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion -msimd-memops} @emph{Adapteva Epiphany Options} @gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @@ -21357,6 +21357,14 @@ used directly. The same applies when using @option{-mcpu=} when the selected cpu supports the @samp{lse} feature. This option is on by default. +@item -msimd-memops +@itemx -mno-simd-memops +Enable or disable use of Advanced SIMD registers when expanding memory +copy and memory set operations. Use of these registers can improve +performance and reduce instruction count for these operations. This +option is ignored unless Advanced SIMD registers are available. +This option is on by default. + @opindex march @item -march=@var{name} Specify the name of the target architecture and, optionally, one or