Skip to content

Commit ae7306b

Browse files
committed
Change arguments of AccumulateSSE16Neon to pointers for better code generation
1 parent b0bfb0a commit ae7306b

File tree

1 file changed

+21
-19
lines changed

1 file changed

+21
-19
lines changed

src/ImageSharp/Formats/Webp/Lossy/LossyUtils.cs

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -216,17 +216,18 @@ private static int Vp8_Sse16xN_Avx2(Span<byte> a, Span<byte> b, int numPairs)
216216
}
217217

218218
[MethodImpl(InliningOptions.ShortMethod)]
219-
private static int Vp8_Sse16x16_Neon(Span<byte> a, Span<byte> b)
219+
private static unsafe int Vp8_Sse16x16_Neon(Span<byte> a, Span<byte> b)
220220
{
221221
Vector128<uint> sum = Vector128<uint>.Zero;
222-
ref byte aRef = ref MemoryMarshal.GetReference(a);
223-
ref byte bRef = ref MemoryMarshal.GetReference(b);
224-
for (int y = 0; y < 16; y++)
222+
fixed (byte* aRef = &MemoryMarshal.GetReference(a))
225223
{
226-
sum = AccumulateSSE16Neon(
227-
ref Unsafe.Add(ref aRef, y * WebpConstants.Bps),
228-
ref Unsafe.Add(ref bRef, y * WebpConstants.Bps),
229-
sum);
224+
fixed (byte* bRef = &MemoryMarshal.GetReference(b))
225+
{
226+
for (int y = 0; y < 16; y++)
227+
{
228+
sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum);
229+
}
230+
}
230231
}
231232

232233
#if NET7_0_OR_GREATER
@@ -237,17 +238,18 @@ ref Unsafe.Add(ref bRef, y * WebpConstants.Bps),
237238
}
238239

239240
[MethodImpl(InliningOptions.ShortMethod)]
240-
private static int Vp8_Sse16x8_Neon(Span<byte> a, Span<byte> b)
241+
private static unsafe int Vp8_Sse16x8_Neon(Span<byte> a, Span<byte> b)
241242
{
242243
Vector128<uint> sum = Vector128<uint>.Zero;
243-
ref byte aRef = ref MemoryMarshal.GetReference(a);
244-
ref byte bRef = ref MemoryMarshal.GetReference(b);
245-
for (int y = 0; y < 8; y++)
244+
fixed (byte* aRef = &MemoryMarshal.GetReference(a))
246245
{
247-
sum = AccumulateSSE16Neon(
248-
ref Unsafe.Add(ref aRef, y * WebpConstants.Bps),
249-
ref Unsafe.Add(ref bRef, y * WebpConstants.Bps),
250-
sum);
246+
fixed (byte* bRef = &MemoryMarshal.GetReference(b))
247+
{
248+
for (int y = 0; y < 8; y++)
249+
{
250+
sum = AccumulateSSE16Neon(aRef + (y * WebpConstants.Bps), bRef + (y * WebpConstants.Bps), sum);
251+
}
252+
}
251253
}
252254

253255
#if NET7_0_OR_GREATER
@@ -296,10 +298,10 @@ private static unsafe Vector128<uint> Load4x4Neon(Span<byte> src)
296298
}
297299

298300
[MethodImpl(InliningOptions.ShortMethod)]
299-
private static Vector128<uint> AccumulateSSE16Neon(ref byte aRef, ref byte bRef, Vector128<uint> sum)
301+
private static unsafe Vector128<uint> AccumulateSSE16Neon(byte* a, byte* b, Vector128<uint> sum)
300302
{
301-
Vector128<byte> a0 = Unsafe.As<byte, Vector128<byte>>(ref aRef);
302-
Vector128<byte> b0 = Unsafe.As<byte, Vector128<byte>>(ref bRef);
303+
Vector128<byte> a0 = AdvSimd.LoadVector128(a);
304+
Vector128<byte> b0 = AdvSimd.LoadVector128(b);
303305

304306
Vector128<byte> absDiff = AdvSimd.AbsoluteDifference(a0, b0);
305307
Vector64<byte> absDiffLower = absDiff.GetLower();

0 commit comments

Comments
 (0)