Skip to content

Commit 23a6193

Browse files
Merge pull request #2419 from gfoidl/vector-constants
Create vector constants inline and not via ROS<byte>
2 parents b6736b0 + d617907 commit 23a6193

File tree

2 files changed

+55
-59
lines changed

2 files changed

+55
-59
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 54 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,38 @@ internal static partial class SimdUtils
1414
{
1515
public static class HwIntrinsics
1616
{
17-
public static ReadOnlySpan<byte> PermuteMaskDeinterleave8x32 => new byte[] { 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0 };
17+
[MethodImpl(MethodImplOptions.AggressiveInlining)] // too much IL for JIT to inline, so give a hint
18+
public static Vector256<int> PermuteMaskDeinterleave8x32() => Vector256.Create(0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsInt32();
1819

19-
public static ReadOnlySpan<byte> PermuteMaskEvenOdd8x32 => new byte[] { 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
20+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
21+
public static Vector256<uint> PermuteMaskEvenOdd8x32() => Vector256.Create(0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
2022

21-
public static ReadOnlySpan<byte> PermuteMaskSwitchInnerDWords8x32 => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0 };
23+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
24+
public static Vector256<uint> PermuteMaskSwitchInnerDWords8x32() => Vector256.Create(0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
2225

23-
private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[] { 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0 };
26+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
27+
private static Vector256<uint> MoveFirst24BytesToSeparateLanes() => Vector256.Create(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
2428

25-
internal static ReadOnlySpan<byte> ExtractRgb => new byte[] { 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF };
29+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
30+
internal static Vector256<byte> ExtractRgb() => Vector256.Create(0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF);
2631

27-
private static ReadOnlySpan<byte> ShuffleMaskPad4Nx16 => new byte[] { 0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80 };
32+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
33+
private static Vector128<byte> ShuffleMaskPad4Nx16() => Vector128.Create(0, 1, 2, 0x80, 3, 4, 5, 0x80, 6, 7, 8, 0x80, 9, 10, 11, 0x80);
2834

29-
private static ReadOnlySpan<byte> ShuffleMaskSlice4Nx16 => new byte[] { 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80 };
35+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
36+
private static Vector128<byte> ShuffleMaskSlice4Nx16() => Vector128.Create(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0x80, 0x80, 0x80, 0x80);
3037

31-
private static ReadOnlySpan<byte> ShuffleMaskShiftAlpha =>
32-
new byte[]
33-
{
34-
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
35-
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15
36-
};
38+
#pragma warning disable SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
39+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
40+
private static Vector256<byte> ShuffleMaskShiftAlpha() => Vector256.Create((byte)
41+
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15,
42+
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15);
3743

38-
public static ReadOnlySpan<byte> PermuteMaskShiftAlpha8x32 =>
39-
new byte[]
40-
{
41-
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
42-
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0
43-
};
44+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
45+
public static Vector256<uint> PermuteMaskShiftAlpha8x32() => Vector256.Create(
46+
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0,
47+
5, 0, 0, 0, 6, 0, 0, 0, 3, 0, 0, 0, 7, 0, 0, 0).AsUInt32();
48+
#pragma warning restore SA1003, SA1116, SA1117 // Parameters should be on same line or separate lines
4449

4550
/// <summary>
4651
/// Shuffle single-precision (32-bit) floating-point elements in <paramref name="source"/>
@@ -190,7 +195,7 @@ public static void Shuffle4Slice3Reduce(
190195
{
191196
if (Ssse3.IsSupported)
192197
{
193-
int remainder = source.Length % (Vector128<byte>.Count * 4);
198+
int remainder = source.Length & ((Vector128<byte>.Count * 4) - 1); // bit-hack for modulo
194199

195200
int sourceCount = source.Length - remainder;
196201
int destCount = (int)((uint)sourceCount * 3 / 4);
@@ -254,7 +259,7 @@ private static void Shuffle4(
254259
ref Vector128<float> destBase =
255260
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
256261

257-
nint n = (nint)(uint)dest.Length / Vector128<float>.Count;
262+
nint n = (nint)((uint)dest.Length / (uint)Vector128<float>.Count);
258263
nint m = Numerics.Modulo4(n);
259264
nint u = n - m;
260265

@@ -307,7 +312,7 @@ private static void Shuffle4(
307312
ref Vector256<byte> destBase =
308313
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
309314

310-
nint n = (nint)(uint)dest.Length / Vector256<byte>.Count;
315+
nint n = (nint)((uint)dest.Length / (uint)Vector256<byte>.Count);
311316
nint m = Numerics.Modulo4(n);
312317
nint u = n - m;
313318

@@ -343,7 +348,7 @@ private static void Shuffle4(
343348
ref Vector128<byte> destBase =
344349
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
345350

346-
nint n = (nint)(uint)dest.Length / Vector128<byte>.Count;
351+
nint n = (nint)((uint)dest.Length / (uint)Vector128<byte>.Count);
347352
nint m = Numerics.Modulo4(n);
348353
nint u = n - m;
349354

@@ -376,10 +381,8 @@ private static void Shuffle3(
376381
{
377382
if (Ssse3.IsSupported)
378383
{
379-
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
380-
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
381-
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
382-
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
384+
Vector128<byte> vmask = ShuffleMaskPad4Nx16();
385+
Vector128<byte> vmasko = ShuffleMaskSlice4Nx16();
383386
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
384387

385388
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
@@ -441,8 +444,7 @@ private static void Pad3Shuffle4(
441444
{
442445
if (Ssse3.IsSupported)
443446
{
444-
ref byte vmaskBase = ref MemoryMarshal.GetReference(ShuffleMaskPad4Nx16);
445-
Vector128<byte> vmask = Unsafe.As<byte, Vector128<byte>>(ref vmaskBase);
447+
Vector128<byte> vmask = ShuffleMaskPad4Nx16();
446448
Vector128<byte> vfill = Vector128.Create(0xff000000ff000000ul).AsByte();
447449

448450
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
@@ -485,8 +487,7 @@ private static void Shuffle4Slice3(
485487
{
486488
if (Ssse3.IsSupported)
487489
{
488-
ref byte vmaskoBase = ref MemoryMarshal.GetReference(ShuffleMaskSlice4Nx16);
489-
Vector128<byte> vmasko = Unsafe.As<byte, Vector128<byte>>(ref vmaskoBase);
490+
Vector128<byte> vmasko = ShuffleMaskSlice4Nx16();
490491
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
491492

492493
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
@@ -543,9 +544,9 @@ private static void Shuffle4Slice3(
543544
/// <returns>The <see cref="Vector256{T}"/>.</returns>
544545
[MethodImpl(InliningOptions.AlwaysInline)]
545546
public static Vector256<float> MultiplyAdd(
546-
in Vector256<float> va,
547-
in Vector256<float> vm0,
548-
in Vector256<float> vm1)
547+
Vector256<float> va,
548+
Vector256<float> vm0,
549+
Vector256<float> vm1)
549550
{
550551
if (Fma.IsSupported)
551552
{
@@ -594,9 +595,9 @@ public static Vector128<float> MultiplyAdd(
594595
/// <returns>The <see cref="Vector256{T}"/>.</returns>
595596
[MethodImpl(InliningOptions.ShortMethod)]
596597
public static Vector256<float> MultiplySubtract(
597-
in Vector256<float> vs,
598-
in Vector256<float> vm0,
599-
in Vector256<float> vm1)
598+
Vector256<float> vs,
599+
Vector256<float> vm0,
600+
Vector256<float> vm1)
600601
{
601602
if (Fma.IsSupported)
602603
{
@@ -616,9 +617,9 @@ public static Vector256<float> MultiplySubtract(
616617
/// <returns>The <see cref="Vector256{T}"/>.</returns>
617618
[MethodImpl(InliningOptions.ShortMethod)]
618619
public static Vector256<float> MultiplyAddNegated(
619-
in Vector256<float> a,
620-
in Vector256<float> b,
621-
in Vector256<float> c)
620+
Vector256<float> a,
621+
Vector256<float> b,
622+
Vector256<float> c)
622623
{
623624
if (Fma.IsSupported)
624625
{
@@ -684,7 +685,7 @@ internal static unsafe void ByteToNormalizedFloat(
684685
ref Vector256<float> destBase =
685686
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
686687

687-
var scale = Vector256.Create(1 / (float)byte.MaxValue);
688+
Vector256<float> scale = Vector256.Create(1 / (float)byte.MaxValue);
688689

689690
for (nuint i = 0; i < n; i++)
690691
{
@@ -717,7 +718,7 @@ internal static unsafe void ByteToNormalizedFloat(
717718
ref Vector128<float> destBase =
718719
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
719720

720-
var scale = Vector128.Create(1 / (float)byte.MaxValue);
721+
Vector128<float> scale = Vector128.Create(1 / (float)byte.MaxValue);
721722
Vector128<byte> zero = Vector128<byte>.Zero;
722723

723724
for (nuint i = 0; i < n; i++)
@@ -819,9 +820,8 @@ internal static void NormalizedFloatToByteSaturate(
819820
ref Vector256<byte> destBase =
820821
ref Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(dest));
821822

822-
var scale = Vector256.Create((float)byte.MaxValue);
823-
ref byte maskBase = ref MemoryMarshal.GetReference(PermuteMaskDeinterleave8x32);
824-
Vector256<int> mask = Unsafe.As<byte, Vector256<int>>(ref maskBase);
823+
Vector256<float> scale = Vector256.Create((float)byte.MaxValue);
824+
Vector256<int> mask = PermuteMaskDeinterleave8x32();
825825

826826
for (nuint i = 0; i < n; i++)
827827
{
@@ -858,7 +858,7 @@ internal static void NormalizedFloatToByteSaturate(
858858
ref Vector128<byte> destBase =
859859
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
860860

861-
var scale = Vector128.Create((float)byte.MaxValue);
861+
Vector128<float> scale = Vector128.Create((float)byte.MaxValue);
862862

863863
for (nuint i = 0; i < n; i++)
864864
{
@@ -895,14 +895,12 @@ internal static void PackFromRgbPlanesAvx2Reduce(
895895

896896
nuint count = redChannel.Vector256Count<byte>();
897897

898-
ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32);
899-
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
898+
Vector256<uint> control1 = PermuteMaskEvenOdd8x32();
900899

901-
ref byte control2Bytes = ref MemoryMarshal.GetReference(PermuteMaskShiftAlpha8x32);
902-
Vector256<uint> control2 = Unsafe.As<byte, Vector256<uint>>(ref control2Bytes);
903-
var a = Vector256.Create((byte)255);
900+
Vector256<uint> control2 = PermuteMaskShiftAlpha8x32();
901+
Vector256<byte> a = Vector256.Create((byte)255);
904902

905-
Vector256<byte> shuffleAlpha = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ShuffleMaskShiftAlpha));
903+
Vector256<byte> shuffleAlpha = ShuffleMaskShiftAlpha();
906904

907905
for (nuint i = 0; i < count; i++)
908906
{
@@ -966,9 +964,8 @@ internal static void PackFromRgbPlanesAvx2Reduce(
966964
ref Vector256<byte> dBase = ref Unsafe.As<Rgba32, Vector256<byte>>(ref MemoryMarshal.GetReference(destination));
967965

968966
nuint count = redChannel.Vector256Count<byte>();
969-
ref byte control1Bytes = ref MemoryMarshal.GetReference(PermuteMaskEvenOdd8x32);
970-
Vector256<uint> control1 = Unsafe.As<byte, Vector256<uint>>(ref control1Bytes);
971-
var a = Vector256.Create((byte)255);
967+
Vector256<uint> control1 = PermuteMaskEvenOdd8x32();
968+
Vector256<byte> a = Vector256.Create((byte)255);
972969

973970
for (nuint i = 0; i < count; i++)
974971
{
@@ -1017,8 +1014,8 @@ internal static void UnpackToRgbPlanesAvx2Reduce(
10171014
ref Vector256<float> destGRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(greenChannel));
10181015
ref Vector256<float> destBRef = ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(blueChannel));
10191016

1020-
Vector256<uint> extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(MoveFirst24BytesToSeparateLanes));
1021-
Vector256<byte> extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(ExtractRgb));
1017+
Vector256<uint> extractToLanesMask = MoveFirst24BytesToSeparateLanes();
1018+
Vector256<byte> extractRgbMask = ExtractRgb();
10221019
Vector256<byte> rgb, rg, bx;
10231020
Vector256<float> r, g, b;
10241021

tests/ImageSharp.Benchmarks/General/PixelConversion/PixelConversion_PackFromRgbPlanes.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,7 @@ public void Rgba32_Avx2_Float()
207207

208208
nuint count = (uint)this.Count / (uint)Vector256<float>.Count;
209209

210-
ref byte control = ref MemoryMarshal.GetReference(SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32);
211-
Vector256<int> vcontrol = Unsafe.As<byte, Vector256<int>>(ref control);
210+
Vector256<int> vcontrol = SimdUtils.HwIntrinsics.PermuteMaskEvenOdd8x32().AsInt32();
212211

213212
var va = Vector256.Create(1F);
214213

0 commit comments

Comments
 (0)