Skip to content

aarch64: Add -msimd-memops option controlling SIMD usage #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions gcc/common/config/aarch64/aarch64-common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ aarch64_handle_option (struct gcc_options *opts,
opts->x_aarch64_flag_outline_atomics = val;
return true;

case OPT_msimd_memops:
opts->x_aarch64_flag_simd_memops = val;
return true;

default:
return true;
}
Expand Down
3 changes: 3 additions & 0 deletions gcc/config/aarch64/aarch64-elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,7 @@ ASM_MABI_SPEC
#undef TYPE_OPERAND_FMT
#define TYPE_OPERAND_FMT "%%%s"

#undef AARCH64_SIMD_MEMOPS_DEFAULT
#define AARCH64_SIMD_MEMOPS_DEFAULT 0

#endif /* GCC_AARCH64_ELF_H */
10 changes: 6 additions & 4 deletions gcc/config/aarch64/aarch64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19371,6 +19371,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
OPT_msign_return_address_ },
{ "outline-atomics", aarch64_attr_bool, true, NULL,
OPT_moutline_atomics},
{ "simd-memops", aarch64_attr_bool, true, NULL,
OPT_msimd_memops},
{ NULL, aarch64_attr_custom, false, NULL, OPT____ }
};

Expand Down Expand Up @@ -26652,8 +26654,8 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
return aarch64_expand_cpymem_mops (operands, is_memmove);

unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
bool use_ldpq = TARGET_SIMD && !(aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);
bool use_ldpq = TARGET_SIMD_MEMOPS && !(aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);

/* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */
unsigned max_copy_size = use_ldpq ? 256 : 128;
Expand All @@ -26673,7 +26675,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
??? Although it would be possible to use LDP/STP Qn in streaming mode
(so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear
whether that would improve performance. */
bool use_qregs = size > 24 && TARGET_SIMD;
bool use_qregs = size > 24 && TARGET_SIMD_MEMOPS;

base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
dst = adjust_automodify_address (dst, VOIDmode, base, 0);
Expand Down Expand Up @@ -26814,7 +26816,7 @@ aarch64_expand_setmem (rtx *operands)
machine_mode cur_mode = BLKmode, next_mode;

/* Variable-sized or strict-align memset may use the MOPS expansion. */
if (!CONST_INT_P (operands[1]) || !TARGET_SIMD
if (!CONST_INT_P (operands[1]) || !TARGET_SIMD_MEMOPS
|| (STRICT_ALIGNMENT && align < 16))
return aarch64_expand_setmem_mops (operands);

Expand Down
7 changes: 7 additions & 0 deletions gcc/config/aarch64/aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@
of LSE instructions. */
#define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics)

#ifndef AARCH64_SIMD_MEMOPS_DEFAULT
#define AARCH64_SIMD_MEMOPS_DEFAULT 1
#endif

/* Allow use of SIMD registers for memory copy and set expansions */
#define TARGET_SIMD_MEMOPS (TARGET_SIMD && aarch64_flag_simd_memops)

/* Align definitions of arrays, unions and structures so that
initializations and copies can be made more efficient. This is not
ABI-changing, so it only affects places where we can see the
Expand Down
4 changes: 4 additions & 0 deletions gcc/config/aarch64/aarch64.opt
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ moutline-atomics
Target Var(aarch64_flag_outline_atomics) Init(2) Save
Generate local calls to out-of-line atomic operations.

msimd-memops
Target Var(aarch64_flag_simd_memops) Init(AARCH64_SIMD_MEMOPS_DEFAULT) Save
Allow use of SIMD registers in memory set/copy expansions.

-param=aarch64-vect-compare-costs=
Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 1) Param
When vectorizing, consider using multiple different approaches and use
Expand Down
2 changes: 2 additions & 0 deletions gcc/config/aarch64/aarch64.opt.urls
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,5 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg)
mstack-protector-guard-offset=
UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset)

msimd-memops
UrlSuffix(gcc/AArch64-Options.html#index-msimd-memops)
10 changes: 9 additions & 1 deletion gcc/doc/invoke.texi
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ Objective-C and Objective-C++ Dialects}.
-moverride=@var{string} -mverbose-cost-dump
-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg}
-mstack-protector-guard-offset=@var{offset} -mtrack-speculation
-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion}
-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion -msimd-memops}

@emph{Adapteva Epiphany Options}
@gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs
Expand Down Expand Up @@ -21357,6 +21357,14 @@ used directly. The same applies when using @option{-mcpu=} when the
selected cpu supports the @samp{lse} feature.
This option is on by default.

@item -msimd-memops
@itemx -mno-simd-memops
Enable or disable use of Advanced SIMD registers when expanding memory
copy and memory set operations. Use of these registers can improve
performance and reduce instruction count for these operations. This
option is ignored unless Advanced SIMD registers are available.
This option is on by default.

@opindex march
@item -march=@var{name}
Specify the name of the target architecture and, optionally, one or
Expand Down