Skip to content

Commit dd01da6

Browse files
keith-packardstephanosio
authored andcommitted
aarch64: Add -msimd-memops option controlling SIMD usage in memset/memcpy
This option (enabled by default) preserves existing behavior by allowing use of Advanced SIMD registers while expanding memset/memcpy/memmove operations into inline instructions. Disabling this option prevents use of these registers for environments where the FPU may be disabled to reduce the cost of saving/restoring the processor state, such as in interrupt handlers. Signed-off-by: Keith Packard <[email protected]> (cherry picked from commit 65837c3)
1 parent 986b697 commit dd01da6

File tree

6 files changed

+32
-5
lines changed

6 files changed

+32
-5
lines changed

gcc/common/config/aarch64/aarch64-common.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ aarch64_handle_option (struct gcc_options *opts,
133133
opts->x_aarch64_flag_outline_atomics = val;
134134
return true;
135135

136+
case OPT_msimd_memops:
137+
opts->x_aarch64_flag_simd_memops = val;
138+
return true;
139+
136140
default:
137141
return true;
138142
}

gcc/config/aarch64/aarch64.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19371,6 +19371,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
1937119371
OPT_msign_return_address_ },
1937219372
{ "outline-atomics", aarch64_attr_bool, true, NULL,
1937319373
OPT_moutline_atomics},
19374+
{ "simd-memops", aarch64_attr_bool, true, NULL,
19375+
OPT_msimd_memops},
1937419376
{ NULL, aarch64_attr_custom, false, NULL, OPT____ }
1937519377
};
1937619378

@@ -26652,8 +26654,8 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
2665226654
return aarch64_expand_cpymem_mops (operands, is_memmove);
2665326655

2665426656
unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
26655-
bool use_ldpq = TARGET_SIMD && !(aarch64_tune_params.extra_tuning_flags
26656-
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);
26657+
bool use_ldpq = TARGET_SIMD_MEMOPS && !(aarch64_tune_params.extra_tuning_flags
26658+
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);
2665726659

2665826660
/* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */
2665926661
unsigned max_copy_size = use_ldpq ? 256 : 128;
@@ -26673,7 +26675,7 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
2667326675
??? Although it would be possible to use LDP/STP Qn in streaming mode
2667426676
(so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear
2667526677
whether that would improve performance. */
26676-
bool use_qregs = size > 24 && TARGET_SIMD;
26678+
bool use_qregs = size > 24 && TARGET_SIMD_MEMOPS;
2667726679

2667826680
base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
2667926681
dst = adjust_automodify_address (dst, VOIDmode, base, 0);
@@ -26814,7 +26816,7 @@ aarch64_expand_setmem (rtx *operands)
2681426816
machine_mode cur_mode = BLKmode, next_mode;
2681526817

2681626818
/* Variable-sized or strict-align memset may use the MOPS expansion. */
26817-
if (!CONST_INT_P (operands[1]) || !TARGET_SIMD
26819+
if (!CONST_INT_P (operands[1]) || !TARGET_SIMD_MEMOPS
2681826820
|| (STRICT_ALIGNMENT && align < 16))
2681926821
return aarch64_expand_setmem_mops (operands);
2682026822

gcc/config/aarch64/aarch64.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,13 @@
125125
of LSE instructions. */
126126
#define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics)
127127

128+
#ifndef AARCH64_SIMD_MEMOPS_DEFAULT
129+
#define AARCH64_SIMD_MEMOPS_DEFAULT 1
130+
#endif
131+
132+
/* Allow use of SIMD registers for memory copy and set expansions */
133+
#define TARGET_SIMD_MEMOPS (TARGET_SIMD && aarch64_flag_simd_memops)
134+
128135
/* Align definitions of arrays, unions and structures so that
129136
initializations and copies can be made more efficient. This is not
130137
ABI-changing, so it only affects places where we can see the

gcc/config/aarch64/aarch64.opt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ moutline-atomics
332332
Target Var(aarch64_flag_outline_atomics) Init(2) Save
333333
Generate local calls to out-of-line atomic operations.
334334

335+
msimd-memops
336+
Target Var(aarch64_flag_simd_memops) Init(AARCH64_SIMD_MEMOPS_DEFAULT) Save
337+
Allow use of SIMD registers in memory set/copy expansions.
338+
335339
-param=aarch64-vect-compare-costs=
336340
Target Joined UInteger Var(aarch64_vect_compare_costs) Init(1) IntegerRange(0, 1) Param
337341
When vectorizing, consider using multiple different approaches and use

gcc/config/aarch64/aarch64.opt.urls

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,5 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg)
9191
mstack-protector-guard-offset=
9292
UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset)
9393

94+
msimd-memops
95+
UrlSuffix(gcc/AArch64-Options.html#index-msimd-memops)

gcc/doc/invoke.texi

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ Objective-C and Objective-C++ Dialects}.
807807
-moverride=@var{string} -mverbose-cost-dump
808808
-mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg}
809809
-mstack-protector-guard-offset=@var{offset} -mtrack-speculation
810-
-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion}
810+
-moutline-atomics -mearly-ldp-fusion -mlate-ldp-fusion -msimd-memops}
811811

812812
@emph{Adapteva Epiphany Options}
813813
@gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs
@@ -21357,6 +21357,14 @@ used directly. The same applies when using @option{-mcpu=} when the
2135721357
selected cpu supports the @samp{lse} feature.
2135821358
This option is on by default.
2135921359

21360+
@item -msimd-memops
21361+
@itemx -mno-simd-memops
21362+
Enable or disable use of Advanced SIMD registers when expanding memory
21363+
copy and memory set operations. Use of these registers can improve
21364+
performance and reduce instruction count for these operations. This
21365+
option is ignored unless Advanced SIMD registers are available.
21366+
This option is on by default.
21367+
2136021368
@opindex march
2136121369
@item -march=@var{name}
2136221370
Specify the name of the target architecture and, optionally, one or

0 commit comments

Comments
 (0)