Skip to content

Commit 4931372

Browse files
Port first Shuffl3 method
1 parent 3969525 commit 4931372

File tree

2 files changed

+142
-45
lines changed

2 files changed

+142
-45
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Runtime.Intrinsics;
99
using System.Runtime.Intrinsics.Arm;
1010
using System.Runtime.Intrinsics.X86;
11+
using SixLabors.ImageSharp.Common.Helpers;
1112
using SixLabors.ImageSharp.PixelFormats;
1213

1314
namespace SixLabors.ImageSharp;
@@ -95,15 +96,15 @@ public static void Shuffle4Reduce(
9596

9697
/// <summary>
9798
/// Shuffle 8-bit integers <paramref name="source"/>
98-
/// using the control and store the results in <paramref name="dest"/>.
99+
/// using the control and store the results in <paramref name="destination"/>.
99100
/// </summary>
100101
/// <param name="source">The source span of bytes.</param>
101-
/// <param name="dest">The destination span of bytes.</param>
102+
/// <param name="destination">The destination span of bytes.</param>
102103
/// <param name="control">The byte control.</param>
103104
[MethodImpl(InliningOptions.ShortMethod)]
104105
public static void Shuffle4Reduce(
105106
ref ReadOnlySpan<byte> source,
106-
ref Span<byte> dest,
107+
ref Span<byte> destination,
107108
byte control)
108109
{
109110
if (Vector512.IsHardwareAccelerated || Vector256.IsHardwareAccelerated || Vector128.IsHardwareAccelerated)
@@ -128,29 +129,29 @@ public static void Shuffle4Reduce(
128129
{
129130
Shuffle4(
130131
source[..adjustedCount],
131-
dest[..adjustedCount],
132+
destination[..adjustedCount],
132133
control);
133134

134135
source = source[adjustedCount..];
135-
dest = dest[adjustedCount..];
136+
destination = destination[adjustedCount..];
136137
}
137138
}
138139
}
139140

140141
/// <summary>
141142
/// Shuffles 8-bit integer triplets within 128-bit lanes in <paramref name="source"/>
142-
/// using the control and store the results in <paramref name="dest"/>.
143+
/// using the control and store the results in <paramref name="destination"/>.
143144
/// </summary>
144145
/// <param name="source">The source span of bytes.</param>
145-
/// <param name="dest">The destination span of bytes.</param>
146+
/// <param name="destination">The destination span of bytes.</param>
146147
/// <param name="control">The byte control.</param>
147148
[MethodImpl(InliningOptions.ShortMethod)]
148149
public static void Shuffle3Reduce(
149150
ref ReadOnlySpan<byte> source,
150-
ref Span<byte> dest,
151+
ref Span<byte> destination,
151152
byte control)
152153
{
153-
if (Ssse3.IsSupported)
154+
if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsRightShift)
154155
{
155156
int remainder = source.Length % (Vector128<byte>.Count * 3);
156157

@@ -160,11 +161,11 @@ public static void Shuffle3Reduce(
160161
{
161162
Shuffle3(
162163
source[..adjustedCount],
163-
dest[..adjustedCount],
164+
destination[..adjustedCount],
164165
control);
165166

166167
source = source[adjustedCount..];
167-
dest = dest[adjustedCount..];
168+
destination = destination[adjustedCount..];
168169
}
169170
}
170171
}
@@ -446,24 +447,21 @@ private static void Shuffle4(
446447
[MethodImpl(InliningOptions.ShortMethod)]
447448
private static void Shuffle3(
448449
ReadOnlySpan<byte> source,
449-
Span<byte> dest,
450+
Span<byte> destination,
450451
byte control)
451452
{
452-
if (Ssse3.IsSupported)
453+
if (Vector128.IsHardwareAccelerated && Vector128Utilities.SupportsRightShift)
453454
{
454-
Vector128<byte> vmask = ShuffleMaskPad4Nx16();
455-
Vector128<byte> vmasko = ShuffleMaskSlice4Nx16();
456-
Vector128<byte> vmaske = Ssse3.AlignRight(vmasko, vmasko, 12);
455+
Vector128<byte> maskPad4Nx16 = ShuffleMaskPad4Nx16();
456+
Vector128<byte> maskSlice4Nx16 = ShuffleMaskSlice4Nx16();
457+
Vector128<byte> maskE = Vector128Utilities.AlignRight(maskSlice4Nx16, maskSlice4Nx16, 12);
457458

458459
Span<byte> bytes = stackalloc byte[Vector128<byte>.Count];
459460
Shuffle.MMShuffleSpan(ref bytes, control);
460-
Vector128<byte> vshuffle = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
461+
Vector128<byte> mask = Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(bytes));
461462

462-
ref Vector128<byte> sourceBase =
463-
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
464-
465-
ref Vector128<byte> destBase =
466-
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
463+
ref Vector128<byte> sourceBase = ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
464+
ref Vector128<byte> destinationBase = ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(destination));
467465

468466
nuint n = source.Vector128Count<byte>();
469467

@@ -472,44 +470,44 @@ private static void Shuffle3(
472470
ref Vector128<byte> vs = ref Unsafe.Add(ref sourceBase, i);
473471

474472
Vector128<byte> v0 = vs;
475-
Vector128<byte> v1 = Unsafe.Add(ref vs, 1);
476-
Vector128<byte> v2 = Unsafe.Add(ref vs, 2);
477-
Vector128<byte> v3 = Sse2.ShiftRightLogical128BitLane(v2, 4);
473+
Vector128<byte> v1 = Unsafe.Add(ref vs, (nuint)1);
474+
Vector128<byte> v2 = Unsafe.Add(ref vs, (nuint)2);
475+
Vector128<byte> v3 = Vector128Utilities.ShiftRightBytesInVector(v2, 4);
478476

479-
v2 = Ssse3.AlignRight(v2, v1, 8);
480-
v1 = Ssse3.AlignRight(v1, v0, 12);
477+
v2 = Vector128Utilities.AlignRight(v2, v1, 8);
478+
v1 = Vector128Utilities.AlignRight(v1, v0, 12);
481479

482-
v0 = Ssse3.Shuffle(Ssse3.Shuffle(v0, vmask), vshuffle);
483-
v1 = Ssse3.Shuffle(Ssse3.Shuffle(v1, vmask), vshuffle);
484-
v2 = Ssse3.Shuffle(Ssse3.Shuffle(v2, vmask), vshuffle);
485-
v3 = Ssse3.Shuffle(Ssse3.Shuffle(v3, vmask), vshuffle);
480+
v0 = Vector128.Shuffle(Vector128.Shuffle(v0, maskPad4Nx16), mask);
481+
v1 = Vector128.Shuffle(Vector128.Shuffle(v1, maskPad4Nx16), mask);
482+
v2 = Vector128.Shuffle(Vector128.Shuffle(v2, maskPad4Nx16), mask);
483+
v3 = Vector128.Shuffle(Vector128.Shuffle(v3, maskPad4Nx16), mask);
486484

487-
v0 = Ssse3.Shuffle(v0, vmaske);
488-
v1 = Ssse3.Shuffle(v1, vmasko);
489-
v2 = Ssse3.Shuffle(v2, vmaske);
490-
v3 = Ssse3.Shuffle(v3, vmasko);
485+
v0 = Vector128.Shuffle(v0, maskE);
486+
v1 = Vector128.Shuffle(v1, maskSlice4Nx16);
487+
v2 = Vector128.Shuffle(v2, maskE);
488+
v3 = Vector128.Shuffle(v3, maskSlice4Nx16);
491489

492-
v0 = Ssse3.AlignRight(v1, v0, 4);
493-
v3 = Ssse3.AlignRight(v3, v2, 12);
490+
v0 = Vector128Utilities.AlignRight(v1, v0, 4);
491+
v3 = Vector128Utilities.AlignRight(v3, v2, 12);
494492

495-
v1 = Sse2.ShiftLeftLogical128BitLane(v1, 4);
496-
v2 = Sse2.ShiftRightLogical128BitLane(v2, 4);
493+
v1 = Vector128Utilities.ShiftLeftBytesInVector(v1, 4);
494+
v2 = Vector128Utilities.ShiftRightBytesInVector(v2, 4);
497495

498-
v1 = Ssse3.AlignRight(v2, v1, 8);
496+
v1 = Vector128Utilities.AlignRight(v2, v1, 8);
499497

500-
ref Vector128<byte> vd = ref Unsafe.Add(ref destBase, i);
498+
ref Vector128<byte> vd = ref Unsafe.Add(ref destinationBase, i);
501499

502500
vd = v0;
503-
Unsafe.Add(ref vd, 1) = v1;
504-
Unsafe.Add(ref vd, 2) = v3;
501+
Unsafe.Add(ref vd, (nuint)1) = v1;
502+
Unsafe.Add(ref vd, (nuint)2) = v3;
505503
}
506504
}
507505
}
508506

509507
[MethodImpl(InliningOptions.ShortMethod)]
510508
private static void Pad3Shuffle4(
511509
ReadOnlySpan<byte> source,
512-
Span<byte> dest,
510+
Span<byte> destination,
513511
byte control)
514512
{
515513
if (Ssse3.IsSupported)
@@ -525,7 +523,7 @@ private static void Pad3Shuffle4(
525523
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(source));
526524

527525
ref Vector128<byte> destBase =
528-
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(dest));
526+
ref Unsafe.As<byte, Vector128<byte>>(ref MemoryMarshal.GetReference(destination));
529527

530528
nuint n = source.Vector128Count<byte>();
531529

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Six Labors Split License.
3+
4+
using System.Diagnostics;
5+
using System.Diagnostics.CodeAnalysis;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.Intrinsics;
8+
using System.Runtime.Intrinsics.Arm;
9+
using System.Runtime.Intrinsics.X86;
10+
11+
namespace SixLabors.ImageSharp.Common.Helpers;
12+
13+
/// <summary>
14+
/// Defines utility methods for <see cref="Vector128{T}"/> that have not yet been normalized in the runtime.
15+
/// Should only be used if the intrinsics are available.
16+
/// </summary>
17+
internal static class Vector128Utilities
18+
{
19+
/// <summary>
20+
/// Gets a value indicating whether right shift operations are supported.
21+
/// </summary>
22+
public static bool SupportsRightShift
23+
{
24+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
25+
get => Ssse3.IsSupported || AdvSimd.IsSupported;
26+
}
27+
28+
/// <summary>
29+
/// Shifts a 128-bit value right by a specified number of bytes while shifting in zeros.
30+
/// </summary>
31+
/// <param name="value">The value to shift.</param>
32+
/// <param name="numBytes">The number of bytes to shift by.</param>
33+
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
34+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
35+
public static Vector128<byte> ShiftRightBytesInVector(Vector128<byte> value, [ConstantExpected(Max = (byte)15)] byte numBytes)
36+
{
37+
if (Sse2.IsSupported)
38+
{
39+
return Sse2.ShiftRightLogical128BitLane(value, numBytes);
40+
}
41+
42+
if (AdvSimd.IsSupported)
43+
{
44+
return AdvSimd.ExtractVector128(value, Vector128<byte>.Zero, numBytes);
45+
}
46+
47+
ThrowUnreachableException();
48+
return default;
49+
}
50+
51+
/// <summary>
52+
/// Shifts a 128-bit value left by a specified number of bytes while shifting in zeros.
53+
/// </summary>
54+
/// <param name="value">The value to shift.</param>
55+
/// <param name="numBytes">The number of bytes to shift by.</param>
56+
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
57+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
58+
public static Vector128<byte> ShiftLeftBytesInVector(Vector128<byte> value, [ConstantExpected(Max = (byte)15)] byte numBytes)
59+
{
60+
if (Sse2.IsSupported)
61+
{
62+
return Sse2.ShiftLeftLogical128BitLane(value, numBytes);
63+
}
64+
65+
if (AdvSimd.IsSupported)
66+
{
67+
return AdvSimd.ExtractVector128(Vector128<byte>.Zero, value, numBytes);
68+
}
69+
70+
ThrowUnreachableException();
71+
return default;
72+
}
73+
74+
/// <summary>
75+
/// Right aligns elements of two source 128-bit values depending on bits in a mask.
76+
/// </summary>
77+
/// <param name="left">The left hand source vector.</param>
78+
/// <param name="right">The right hand source vector.</param>
79+
/// <param name="mask">An 8-bit mask used for the operation.</param>
80+
/// <returns>The <see cref="Vector128{Byte}"/>.</returns>
81+
public static Vector128<byte> AlignRight(Vector128<byte> left, Vector128<byte> right, [ConstantExpected(Max = (byte)15)] byte mask)
82+
{
83+
if (Sse3.IsSupported)
84+
{
85+
return Ssse3.AlignRight(left, right, mask);
86+
}
87+
88+
if (AdvSimd.IsSupported)
89+
{
90+
return AdvSimd.ExtractVector128(right, left, mask);
91+
}
92+
93+
ThrowUnreachableException();
94+
return default;
95+
}
96+
97+
[DoesNotReturn]
98+
private static void ThrowUnreachableException() => throw new UnreachableException();
99+
}

0 commit comments

Comments
 (0)