@@ -42,6 +42,36 @@ public static ref T DangerousGetReference<T>(this ReadOnlySpan<T> span)
4242 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
4343 public static unsafe ref T DangerousGetReferenceAt < T > ( this ReadOnlySpan < T > span , int i )
4444 {
45+ // Here we assume the input index will never be negative, so we do an unsafe cast to
46+ // force the JIT to skip the sign extension when going from int to native int.
47+ // On .NET Core 3.1, if we only use Unsafe.Add(ref r0, i), we get the following:
48+ // =============================
49+ // L0000: mov rax, [rcx]
50+ // L0003: movsxd rdx, edx
51+ // L0006: lea rax, [rax+rdx*4]
52+ // L000a: ret
53+ // =============================
54+ // Note the movsxd (move with sign extension) to expand the index passed in edx to
55+ // the whole rdx register. This is unnecessary and more expensive than just a mov,
56+ // which when done to a large register size automatically zeroes the upper bits.
57+ // With the (IntPtr)(void*)(uint) cast, we get the following codegen instead:
58+ // =============================
59+ // L0000: mov rax, [rcx]
60+ // L0003: mov edx, edx
61+ // L0005: lea rax, [rax+rdx*4]
62+ // L0009: ret
63+ // =============================
64+ // Here we can see how the index is extended to a native integer with just a mov,
65+ // which effectively only zeroes the upper bits of the same register used as source.
66+ // These three casts are a bit verbose, but they do the trick on both 32 bit and 64
67+ // bit architectures, producing optimal code in both cases (they are either completely
68+ // elided on 32 bit systems, or result in the correct register expansion when on 64 bit).
69+ // We first do an unchecked conversion to uint (which is just a reinterpret-cast). We
70+ // then cast to void*, which lets the following IntPtr cast avoid the range check on 32 bit
71+ // (since uint could be out of range there if the original index was negative). The final
72+ // result is a clean mov as shown above. This will eventually be natively supported by the
73+ // JIT compiler (see https://github.com/dotnet/runtime/issues/38794), but doing this here
74+ // still ensures the optimal codegen even on existing runtimes (eg. .NET Core 2.1 and 3.1).
4575 ref T r0 = ref MemoryMarshal . GetReference ( span ) ;
4676 ref T ri = ref Unsafe . Add ( ref r0 , ( IntPtr ) ( void * ) ( uint ) i ) ;
4777
0 commit comments