Skip to content

Commit 77ffeea

Browse files
committed
Create vector constants inline and not via ROS<byte>
1 parent 9756ae9 commit 77ffeea

File tree

2 files changed

+56
-60
lines changed

2 files changed

+56
-60
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 55 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,38 @@ internal static partial class SimdUtils
1313
{
1414
public static class HwIntrinsics
1515
{
16-
public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
16+
[MethodImpl(MethodImplOptions.AggressiveInlining)] // too much IL for JIT to inline, so give a hint
17+
public static Vector256<int> PermuteMaskDeinterleave8x32() => Vector256.Create(0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsInt32();
1718

18-
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
19+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
20+
public static Vector256<uint> PermuteMaskEvenOdd8x32() => Vector256.Create(0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
1921

20-
public static ReadOnlySpan<byte> PermuteMaskSwitchInnerDWords8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0 };
22+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
23+
public static Vector256<uint> PermuteMaskSwitchInnerDWords8x32() => Vector256.Create(0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
2124

22-
private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
25+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
26+
private static Vector256<uint> MoveFirst24BytesToSeparateLanes() => Vector256.Create(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
2327

24-
internal static ReadOnlySpan<byte> ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF };
28+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
29+
internal static Vector256<byte> ExtractRgb() => Vector256.Create(0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF);
2530

26-
private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };
31+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
32+
private static Vector128<byte> ShuffleMaskPad4Nx16() => Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80);
2733

28-
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
34+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
35+
private static Vector128<byte> ShuffleMaskSlice4Nx16() => Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80);
2936

30-
private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
31-
new byte[]
32-
{
33-
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
34-
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
35-
};
37+
#pragma warning disable SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
38+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
39+
private static Vector256<byte> ShuffleMaskShiftAlpha() => Vector256.Create((byte)
40+
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
41+
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
3642

37-
public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
38-
new byte[]
39-
{
40-
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
41-
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
42-
};
43+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
44+
public static Vector256<uint> PermuteMaskShiftAlpha8x32() => Vector256.Create(
45+
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
46+
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
47+
#pragma warning restore SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
4348

4449
/// <summary>
4550
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
@@ -189,7 +194,7 @@ public static void Shuffle4Slice3Reduce(
189194
{
190195
if (Ssse3.IsSupported)
191196
{
192-
int remainder = source.Length % (Vector128<byte>.Count * 4);
197+
int remainder = source.Length & (Vector128<byte>.Count * 4 - 1); // bit-hack for modulo
193198

194199
int sourceCount = source.Length - remainder;
195200
int destCount = (int)((uint)sourceCount * 3 / 4);
@@ -221,7 +226,7 @@ private static void Shuffle4(
221226
ref Vector256<float> destBase =
222227
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
223228

224-
nint n = (nint)(uint)(dest.Length / Vector256<float>.Count);
229+
nint n = (nint)((uint)dest.Length / (uint)Vector256<float>.Count);
225230
nint m = Numerics.Modulo4(n);
226231
nint u = n - m;
227232

@@ -253,7 +258,7 @@ private static void Shuffle4(
253258
ref Vector128<float> destBase =
254259
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
255260

256-
nint n = (nint)(uint)dest.Length / Vector128<float>.Count;
261+
nint n = (nint)((uint)dest.Length / (uint)Vector128<float>.Count);
257262
nint m = Numerics.Modulo4(n);
258263
nint u = n - m;
259264

@@ -306,7 +311,7 @@ private static void Shuffle4(
306311
ref Vector256<byte> destBase =
307312
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
308313

309-
nint n = (nint)(uint)dest.Length / Vector256<byte>.Count;
314+
nint n = (nint)((uint)dest.Length / (uint)Vector256<byte>.Count);
310315
nint m = Numerics.Modulo4(n);
311316
nint u = n - m;
312317

@@ -342,7 +347,7 @@ private static void Shuffle4(
342347
ref Vector128<byte> destBase =
343348
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
344349

345-
nint n = (nint)(uint)dest.Length / Vector128<byte>.Count;
350+
nint n = (nint)((uint)dest.Length / (uint)Vector128<byte>.Count);
346351
nint m = Numerics.Modulo4(n);
347352
nint u = n - m;
348353

@@ -375,10 +380,8 @@ private static void Shuffle3(
375380
{
376381
if (Ssse3.IsSupported)
377382
{
378-
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
379-
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
380-
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
381-
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
383+
Vector128<byte> vmask = ShuffleMaskPad4Nx16();
384+
Vector128<byte> vmasko = ShuffleMaskSlice4Nx16();
382385
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
383386

384387
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
@@ -440,8 +443,7 @@ private static void Pad3Shuffle4(
440443
{
441444
if (Ssse3.IsSupported)
442445
{
443-
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
444-
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
446+
Vector128<byte> vmask = ShuffleMaskPad4Nx16();
445447
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
446448

447449
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
@@ -484,8 +486,7 @@ private static void Shuffle4Slice3(
484486
{
485487
if (Ssse3.IsSupported)
486488
{
487-
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
488-
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
489+
Vector128<byte> vmasko = ShuffleMaskSlice4Nx16();
489490
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
490491

491492
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
@@ -542,9 +543,9 @@ private static void Shuffle4Slice3(
542543
/// <returns>The <see cref="Vector256{T}"/>.</returns>
543544
[MethodImpl(InliningOptions.AlwaysInline)]
544545
public static Vector256<float> MultiplyAdd(
545-
in Vector256<float> va,
546-
in Vector256<float> vm0,
547-
in Vector256<float> vm1)
546+
Vector256<float> va,
547+
Vector256<float> vm0,
548+
Vector256<float> vm1)
548549
{
549550
if (Fma.IsSupported)
550551
{
@@ -565,9 +566,9 @@ public static Vector256<float> MultiplyAdd(
565566
/// <returns>The <see cref="Vector256{T}"/>.</returns>
566567
[MethodImpl(InliningOptions.ShortMethod)]
567568
public static Vector256<float> MultiplySubtract(
568-
in Vector256<float> vs,
569-
in Vector256<float> vm0,
570-
in Vector256<float> vm1)
569+
Vector256<float> vs,
570+
Vector256<float> vm0,
571+
Vector256<float> vm1)
571572
{
572573
if (Fma.IsSupported)
573574
{
@@ -587,9 +588,9 @@ public static Vector256<float> MultiplySubtract(
587588
/// <returns>The <see cref="Vector256{T}"/>.</returns>
588589
[MethodImpl(InliningOptions.ShortMethod)]
589590
public static Vector256<float> MultiplyAddNegated(
590-
in Vector256<float> a,
591-
in Vector256<float> b,
592-
in Vector256<float> c)
591+
Vector256<float> a,
592+
Vector256<float> b,
593+
Vector256<float> c)
593594
{
594595
if (Fma.IsSupported)
595596
{
@@ -655,7 +656,7 @@ internal static unsafe void ByteToNormalizedFloat(
655656
ref Vector256<float> destBase =
656657
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
657658

658-
var scale = Vector256.Create(1 / (float)byte.MaxValue);
659+
Vector256<float> scale = Vector256.Create(1 / (float)byte.MaxValue);
659660

660661
for (nuint i = 0; i < n; i++)
661662
{
@@ -688,7 +689,7 @@ internal static unsafe void ByteToNormalizedFloat(
688689
ref Vector128<float> destBase =
689690
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
690691

691-
var scale = Vector128.Create(1 / (float)byte.MaxValue);
692+
Vector128<float> scale = Vector128.Create(1 / (float)byte.MaxValue);
692693
Vector128<byte> zero = Vector128<byte>.Zero;
693694

694695
for (nuint i = 0; i < n; i++)
@@ -790,9 +791,8 @@ internal static void NormalizedFloatToByteSaturate(
790791
ref Vector256<byte> destBase =
791792
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
792793

793-
var scale = Vector256.Create((float)byte.MaxValue);
794-
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
795-
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
794+
Vector256<float> scale = Vector256.Create((float)byte.MaxValue);
795+
Vector256<int> mask = PermuteMaskDeinterleave8x32();
796796

797797
for (nuint i = 0; i < n; i++)
798798
{
@@ -829,7 +829,7 @@ internal static void NormalizedFloatToByteSaturate(
829829
ref Vector128<byte> destBase =
830830
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
831831

832-
var scale = Vector128.Create((float)byte.MaxValue);
832+
Vector128<float> scale = Vector128.Create((float)byte.MaxValue);
833833

834834
for (nuint i = 0; i < n; i++)
835835
{
@@ -866,14 +866,12 @@ internal static void PackFromRgbPlanesAvx2Reduce(
866866

867867
nuint count = (uint)redChannel.Length / (uint)Vector256<byte>.Count;
868868

869-
ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32);
870-
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
869+
Vector256<uint> control1 = PermuteMaskEvenOdd8x32();
871870

872-
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
873-
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
874-
var a = Vector256.Create((byte)255);
871+
Vector256<uint> control2 = PermuteMaskShiftAlpha8x32();
872+
Vector256<byte> a = Vector256.Create((byte)255);
875873

876-
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
874+
Vector256<byte> shuffleAlpha = ShuffleMaskShiftAlpha();
877875

878876
for (nuint i = 0; i < count; i++)
879877
{
@@ -937,9 +935,8 @@ internal static void PackFromRgbPlanesAvx2Reduce(
937935
ref Vector256<byte> dBase = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(destination));
938936

939937
nuint count = (uint)redChannel.Length / (uint)Vector256<byte>.Count;
940-
ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32);
941-
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
942-
var a = Vector256.Create((byte)255);
938+
Vector256<uint> control1 = PermuteMaskEvenOdd8x32();
939+
Vector256<byte> a = Vector256.Create((byte)255);
943940

944941
for (nuint i = 0; i < count; i++)
945942
{
@@ -988,8 +985,8 @@ internal static void UnpackToRgbPlanesAvx2Reduce(
988985
ref Vector256<float> destGRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(greenChannel));
989986
ref Vector256<float> destBRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(blueChannel));
990987

991-
Vector256<uint> extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
992-
Vector256<byte> extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
988+
Vector256<uint> extractToLanesMask = MoveFirst24BytesToSeparateLanes();
989+
Vector256<byte> extractRgbMask = ExtractRgb();
993990
Vector256<byte> rgb, rg, bx;
994991
Vector256<float> r, g, b;
995992

tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,7 @@ public void Rgba32_Avx2_Float()
207207

208208
nuint count = (uint)this.Count / (uint)Vector256<float>.Count;
209209

210-
ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
211-
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
210+
Vector256<int> vcontrol = SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32().AsInt32();
212211

213212
var va = Vector256.Create(1F);
214213

0 commit comments

Comments
 (0)