Commit ffb52e9
Optimize stackalloc zeroing on arm64 via STORE_BLK (#121986)
Enable X64's optimization where we clear LCLHEAP via STORE_BLK inserted
in Lower on arm64.
```cs
static void Test128() => Consume(stackalloc char[128]);
```
was:
```asm
stp xzr, xzr, [sp, #-0x10]!
stp xzr, xzr, [sp, #-0xF0]!
stp xzr, xzr, [sp, #0x10]
stp xzr, xzr, [sp, #0x20]
stp xzr, xzr, [sp, #0x30]
stp xzr, xzr, [sp, #0x40]
stp xzr, xzr, [sp, #0x50]
stp xzr, xzr, [sp, #0x60]
stp xzr, xzr, [sp, #0x70]
stp xzr, xzr, [sp, #0x80]
stp xzr, xzr, [sp, #0x90]
stp xzr, xzr, [sp, #0xA0]
stp xzr, xzr, [sp, #0xB0]
stp xzr, xzr, [sp, #0xC0]
stp xzr, xzr, [sp, #0xD0]
stp xzr, xzr, [sp, #0xE0]
```
now:
```asm
movi v16.16b, #0
stp q16, q16, [x0]
stp q16, q16, [x0, #0x20]
stp q16, q16, [x0, #0x40]
stp q16, q16, [x0, #0x60]
stp q16, q16, [x0, #0x80]
stp q16, q16, [x0, #0xA0]
stp q16, q16, [x0, #0xC0]
stp q16, q16, [x0, #0xE0]
```
Also, for larger sizes the previous logic used to emit a slow loop (e.g.
1024 bytes):
```asm
mov w0, #0x400
G_M30953_IG03:
stp xzr, xzr, [sp, #-0x10]!
subs x0, x0, #16
bne G_M30953_IG03
```
Now it will emit a call to `CORINFO_HELP_MEMZERO`
[Benchmarks.](EgorBot/runtime-utils#553)
```cs
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
public class Benchmarks
{
[Benchmark] public void Stackalloc64() => Consume(stackalloc byte[64]);
[Benchmark] public void Stackalloc128() => Consume(stackalloc byte[128]);
[Benchmark] public void Stackalloc256() => Consume(stackalloc byte[256]);
[Benchmark] public void Stackalloc512() => Consume(stackalloc byte[512]);
[Benchmark] public void Stackalloc1024() => Consume(stackalloc byte[1024]);
[Benchmark] public void Stackalloc16384() => Consume(stackalloc byte[16384]);
[MethodImpl(MethodImplOptions.NoInlining)]
static void Consume(Span<byte> x){}
}
```
| Method | Toolchain | Mean | Error | Ratio |
|---------------- |------------------------
|-----------:|----------:|------:|
| Stackalloc64 | Main | 3.425 ns | 0.0004 ns | 1.00 |
| Stackalloc64 | PR | 2.559 ns | 0.0008 ns | 0.75 |
| | | | | |
| Stackalloc128 | Main | 3.999 ns | 0.0002 ns | 1.00 |
| Stackalloc128 | PR | 2.404 ns | 0.0003 ns | 0.60 |
| | | | | |
| Stackalloc256 | Main | 5.431 ns | 0.0005 ns | 1.00 |
| Stackalloc256 | PR | 2.754 ns | 0.0003 ns | 0.51 |
| | | | | |
| Stackalloc512 | Main | 12.661 ns | 0.2744 ns | 1.00 |
| Stackalloc512 | PR | 7.423 ns | 0.0008 ns | 0.59 |
| | | | | |
| Stackalloc1024 | Main | 24.958 ns | 0.5326 ns | 1.00 |
| Stackalloc1024 | PR | 14.031 ns | 0.0040 ns | 0.56 |
| | | | | |
| Stackalloc16384 | Main | 374.899 ns | 0.0130 ns | 1.00 |
| Stackalloc16384 | PR | 111.029 ns | 1.2123 ns | 0.30 |
---------
Co-authored-by: Jakob Botsch Nielsen <[email protected]>1 parent 80d3434 commit ffb52e9
3 files changed
+21
-82
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
3018 | 3018 | | |
3019 | 3019 | | |
3020 | 3020 | | |
3021 | | - | |
3022 | 3021 | | |
3023 | 3022 | | |
3024 | 3023 | | |
| |||
3027 | 3026 | | |
3028 | 3027 | | |
3029 | 3028 | | |
| 3029 | + | |
| 3030 | + | |
3030 | 3031 | | |
3031 | 3032 | | |
3032 | | - | |
| 3033 | + | |
3033 | 3034 | | |
3034 | | - | |
3035 | | - | |
| 3035 | + | |
| 3036 | + | |
| 3037 | + | |
3036 | 3038 | | |
3037 | 3039 | | |
3038 | 3040 | | |
| |||
3056 | 3058 | | |
3057 | 3059 | | |
3058 | 3060 | | |
3059 | | - | |
| 3061 | + | |
3060 | 3062 | | |
3061 | 3063 | | |
3062 | 3064 | | |
| |||
3093 | 3095 | | |
3094 | 3096 | | |
3095 | 3097 | | |
3096 | | - | |
| 3098 | + | |
3097 | 3099 | | |
3098 | 3100 | | |
3099 | 3101 | | |
| |||
3104 | 3106 | | |
3105 | 3107 | | |
3106 | 3108 | | |
3107 | | - | |
3108 | | - | |
3109 | | - | |
3110 | | - | |
3111 | | - | |
3112 | | - | |
3113 | | - | |
3114 | | - | |
3115 | | - | |
3116 | | - | |
3117 | | - | |
3118 | | - | |
3119 | | - | |
3120 | | - | |
3121 | | - | |
3122 | | - | |
3123 | | - | |
3124 | | - | |
3125 | | - | |
3126 | | - | |
3127 | | - | |
3128 | | - | |
3129 | | - | |
3130 | | - | |
3131 | | - | |
3132 | | - | |
3133 | | - | |
3134 | | - | |
3135 | | - | |
3136 | | - | |
3137 | | - | |
3138 | | - | |
3139 | | - | |
| 3109 | + | |
3140 | 3110 | | |
3141 | 3111 | | |
3142 | 3112 | | |
| |||
3178 | 3148 | | |
3179 | 3149 | | |
3180 | 3150 | | |
3181 | | - | |
| 3151 | + | |
3182 | 3152 | | |
3183 | 3153 | | |
3184 | 3154 | | |
| |||
3190 | 3160 | | |
3191 | 3161 | | |
3192 | 3162 | | |
3193 | | - | |
| 3163 | + | |
3194 | 3164 | | |
3195 | 3165 | | |
3196 | 3166 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
11418 | 11418 | | |
11419 | 11419 | | |
11420 | 11420 | | |
11421 | | - | |
| 11421 | + | |
11422 | 11422 | | |
11423 | 11423 | | |
11424 | 11424 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1129 | 1129 | | |
1130 | 1130 | | |
1131 | 1131 | | |
1132 | | - | |
1133 | | - | |
1134 | | - | |
1135 | | - | |
1136 | | - | |
1137 | | - | |
1138 | | - | |
1139 | | - | |
1140 | | - | |
1141 | | - | |
1142 | | - | |
1143 | | - | |
1144 | 1132 | | |
1145 | | - | |
| 1133 | + | |
1146 | 1134 | | |
1147 | | - | |
1148 | 1135 | | |
1149 | 1136 | | |
| 1137 | + | |
| 1138 | + | |
| 1139 | + | |
| 1140 | + | |
1150 | 1141 | | |
1151 | | - | |
1152 | | - | |
| 1142 | + | |
1153 | 1143 | | |
1154 | | - | |
1155 | | - | |
1156 | | - | |
1157 | | - | |
1158 | | - | |
1159 | | - | |
1160 | | - | |
1161 | | - | |
1162 | | - | |
1163 | | - | |
1164 | | - | |
1165 | | - | |
1166 | | - | |
1167 | | - | |
1168 | | - | |
1169 | | - | |
1170 | | - | |
1171 | | - | |
1172 | | - | |
1173 | | - | |
1174 | | - | |
1175 | | - | |
1176 | | - | |
1177 | | - | |
| 1144 | + | |
| 1145 | + | |
| 1146 | + | |
1178 | 1147 | | |
1179 | 1148 | | |
1180 | 1149 | | |
| |||
0 commit comments