Skip to content

Commit 29a5635

Browse files
Clean up and prep for Vector512 multiply
1 parent 69caa49 commit 29a5635

File tree

11 files changed

+109
-75
lines changed

11 files changed

+109
-75
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,9 +1012,9 @@ internal static void NormalizedFloatToByteSaturate(
10121012
Unsafe.Add(ref destinationBase, i) = b;
10131013
}
10141014
}
1015-
else if (Sse2.IsSupported || AdvSimd.IsSupported)
1015+
else if (Vector128.IsHardwareAccelerated)
10161016
{
1017-
// Sse, AdvSimd
1017+
// Sse, AdvSimd, etc.
10181018
DebugVerifySpanInput(source, destination, Vector128<byte>.Count);
10191019

10201020
nuint n = destination.Vector128Count<byte>();

src/ImageSharp/Common/Helpers/Vector128Utilities.cs

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.Runtime.CompilerServices;
77
using System.Runtime.Intrinsics;
88
using System.Runtime.Intrinsics.Arm;
9+
using System.Runtime.Intrinsics.Wasm;
910
using System.Runtime.Intrinsics.X86;
1011

1112
namespace SixLabors.ImageSharp.Common.Helpers;
@@ -270,8 +271,16 @@ public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector
270271
return AdvSimd.ExtractNarrowingSaturateUnsignedUpper(AdvSimd.ExtractNarrowingSaturateUnsignedLower(left), right);
271272
}
272273

273-
ThrowUnreachableException();
274-
return default;
274+
if (PackedSimd.IsSupported)
275+
{
276+
return PackedSimd.ConvertNarrowingSaturateUnsigned(left, right);
277+
}
278+
279+
Vector128<short> min = Vector128.Create((short)byte.MinValue);
280+
Vector128<short> max = Vector128.Create((short)byte.MaxValue);
281+
Vector128<ushort> lefClamped = Clamp(left, min, max).AsUInt16();
282+
Vector128<ushort> rightClamped = Clamp(right, min, max).AsUInt16();
283+
return Vector128.Narrow(lefClamped, rightClamped);
275284
}
276285

277286
/// <summary>
@@ -293,10 +302,30 @@ public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128
293302
return AdvSimd.ExtractNarrowingSaturateUpper(AdvSimd.ExtractNarrowingSaturateLower(left), right);
294303
}
295304

296-
ThrowUnreachableException();
297-
return default;
305+
if (PackedSimd.IsSupported)
306+
{
307+
return PackedSimd.ConvertNarrowingSaturateSigned(left, right);
308+
}
309+
310+
Vector128<int> min = Vector128.Create((int)short.MinValue);
311+
Vector128<int> max = Vector128.Create((int)short.MaxValue);
312+
Vector128<int> lefClamped = Clamp(left, min, max);
313+
Vector128<int> rightClamped = Clamp(right, min, max);
314+
return Vector128.Narrow(lefClamped, rightClamped);
298315
}
299316

317+
/// <summary
318+
/// >Restricts a vector between a minimum and a maximum value.
319+
/// </summary>
320+
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
321+
/// <param name="value">The vector to restrict.</param>
322+
/// <param name="min">The minimum value.</param>
323+
/// <param name="max">The maximum value.</param>
324+
/// <returns>The restricted <see cref="Vector128{T}"/>.</returns>
325+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
326+
public static Vector128<T> Clamp<T>(Vector128<T> value, Vector128<T> min, Vector128<T> max)
327+
=> Vector128.Min(Vector128.Max(value, min), max);
328+
300329
[DoesNotReturn]
301330
private static void ThrowUnreachableException() => throw new UnreachableException();
302331
}

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ private static void MultiplyIntoInt16_Sse2(ref Block8x8F a, ref Block8x8F b, ref
6464

6565
ref Vector128<short> destBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref dest);
6666

67+
// TODO: We can use the v128 utilities for this.
6768
for (nuint i = 0; i < 16; i += 2)
6869
{
6970
Vector128<int> left = Sse2.ConvertToVector128Int32(Sse.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Generated.cs renamed to src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Round.cs

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,36 +5,36 @@
55
using System.Runtime.CompilerServices;
66
using System.Runtime.Intrinsics;
77

8-
// <auto-generated />
98
namespace SixLabors.ImageSharp.Formats.Jpeg.Components;
109

1110
internal partial struct Block8x8F
1211
{
1312
/// <summary>
1413
/// Level shift by +maximum/2, clip to [0, maximum]
1514
/// </summary>
15+
/// <param name="maximum">The maximum value to normalize to.</param>
1616
public void NormalizeColorsInPlace(float maximum)
1717
{
18-
var CMin4 = new Vector4(0F);
19-
var CMax4 = new Vector4(maximum);
20-
var COff4 = new Vector4(MathF.Ceiling(maximum * 0.5F));
21-
22-
this.V0L = Numerics.Clamp(this.V0L + COff4, CMin4, CMax4);
23-
this.V0R = Numerics.Clamp(this.V0R + COff4, CMin4, CMax4);
24-
this.V1L = Numerics.Clamp(this.V1L + COff4, CMin4, CMax4);
25-
this.V1R = Numerics.Clamp(this.V1R + COff4, CMin4, CMax4);
26-
this.V2L = Numerics.Clamp(this.V2L + COff4, CMin4, CMax4);
27-
this.V2R = Numerics.Clamp(this.V2R + COff4, CMin4, CMax4);
28-
this.V3L = Numerics.Clamp(this.V3L + COff4, CMin4, CMax4);
29-
this.V3R = Numerics.Clamp(this.V3R + COff4, CMin4, CMax4);
30-
this.V4L = Numerics.Clamp(this.V4L + COff4, CMin4, CMax4);
31-
this.V4R = Numerics.Clamp(this.V4R + COff4, CMin4, CMax4);
32-
this.V5L = Numerics.Clamp(this.V5L + COff4, CMin4, CMax4);
33-
this.V5R = Numerics.Clamp(this.V5R + COff4, CMin4, CMax4);
34-
this.V6L = Numerics.Clamp(this.V6L + COff4, CMin4, CMax4);
35-
this.V6R = Numerics.Clamp(this.V6R + COff4, CMin4, CMax4);
36-
this.V7L = Numerics.Clamp(this.V7L + COff4, CMin4, CMax4);
37-
this.V7R = Numerics.Clamp(this.V7R + COff4, CMin4, CMax4);
18+
Vector4 min = Vector4.Zero;
19+
Vector4 max = new(maximum);
20+
Vector4 off = new(MathF.Ceiling(maximum * 0.5F));
21+
22+
this.V0L = Vector4.Clamp(this.V0L + off, min, max);
23+
this.V0R = Vector4.Clamp(this.V0R + off, min, max);
24+
this.V1L = Vector4.Clamp(this.V1L + off, min, max);
25+
this.V1R = Vector4.Clamp(this.V1R + off, min, max);
26+
this.V2L = Vector4.Clamp(this.V2L + off, min, max);
27+
this.V2R = Vector4.Clamp(this.V2R + off, min, max);
28+
this.V3L = Vector4.Clamp(this.V3L + off, min, max);
29+
this.V3R = Vector4.Clamp(this.V3R + off, min, max);
30+
this.V4L = Vector4.Clamp(this.V4L + off, min, max);
31+
this.V4R = Vector4.Clamp(this.V4R + off, min, max);
32+
this.V5L = Vector4.Clamp(this.V5L + off, min, max);
33+
this.V5R = Vector4.Clamp(this.V5R + off, min, max);
34+
this.V6L = Vector4.Clamp(this.V6L + off, min, max);
35+
this.V6R = Vector4.Clamp(this.V6R + off, min, max);
36+
this.V7L = Vector4.Clamp(this.V7L + off, min, max);
37+
this.V7R = Vector4.Clamp(this.V7R + off, min, max);
3838
}
3939

4040
/// <summary>
@@ -44,7 +44,7 @@ public void NormalizeColorsInPlace(float maximum)
4444
[MethodImpl(InliningOptions.ShortMethod)]
4545
public void NormalizeColorsAndRoundInPlaceVector256(float maximum)
4646
{
47-
Vector256<float> off = Vector256.Create(MathF.Ceiling(maximum * 0.5F));
47+
Vector256<float> off = Vector256.Create(MathF.Ceiling(maximum * 0.5F));
4848
Vector256<float> max = Vector256.Create(maximum);
4949

5050
ref Vector256<float> row0 = ref Unsafe.As<Vector4, Vector256<float>>(ref this.V0L);
@@ -103,6 +103,7 @@ public void NormalizeColorsAndRoundInPlaceVector128(float maximum)
103103
/// <summary>
104104
/// Fill the block from 'source' doing short -> float conversion.
105105
/// </summary>
106+
/// <param name="source">The source block</param>
106107
public void LoadFromInt16Scalar(ref Block8x8 source)
107108
{
108109
ref short selfRef = ref Unsafe.As<Block8x8, short>(ref source);

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -159,17 +159,18 @@ public float[] ToArray()
159159
[MethodImpl(InliningOptions.ShortMethod)]
160160
public void MultiplyInPlace(float value)
161161
{
162-
if (Avx.IsSupported)
162+
// TODO: Vector512
163+
if (Vector256.IsHardwareAccelerated)
163164
{
164165
Vector256<float> valueVec = Vector256.Create(value);
165-
this.V0 = Avx.Multiply(this.V0, valueVec);
166-
this.V1 = Avx.Multiply(this.V1, valueVec);
167-
this.V2 = Avx.Multiply(this.V2, valueVec);
168-
this.V3 = Avx.Multiply(this.V3, valueVec);
169-
this.V4 = Avx.Multiply(this.V4, valueVec);
170-
this.V5 = Avx.Multiply(this.V5, valueVec);
171-
this.V6 = Avx.Multiply(this.V6, valueVec);
172-
this.V7 = Avx.Multiply(this.V7, valueVec);
166+
this.V0 *= valueVec;
167+
this.V1 *= valueVec;
168+
this.V2 *= valueVec;
169+
this.V3 *= valueVec;
170+
this.V4 *= valueVec;
171+
this.V5 *= valueVec;
172+
this.V6 *= valueVec;
173+
this.V7 *= valueVec;
173174
}
174175
else
175176
{
@@ -200,16 +201,17 @@ public void MultiplyInPlace(float value)
200201
[MethodImpl(InliningOptions.ShortMethod)]
201202
public unsafe void MultiplyInPlace(ref Block8x8F other)
202203
{
203-
if (Avx.IsSupported)
204+
// TODO: Vector512
205+
if (Vector256.IsHardwareAccelerated)
204206
{
205-
this.V0 = Avx.Multiply(this.V0, other.V0);
206-
this.V1 = Avx.Multiply(this.V1, other.V1);
207-
this.V2 = Avx.Multiply(this.V2, other.V2);
208-
this.V3 = Avx.Multiply(this.V3, other.V3);
209-
this.V4 = Avx.Multiply(this.V4, other.V4);
210-
this.V5 = Avx.Multiply(this.V5, other.V5);
211-
this.V6 = Avx.Multiply(this.V6, other.V6);
212-
this.V7 = Avx.Multiply(this.V7, other.V7);
207+
this.V0 *= other.V0;
208+
this.V1 *= other.V1;
209+
this.V2 *= other.V2;
210+
this.V3 *= other.V3;
211+
this.V4 *= other.V4;
212+
this.V5 *= other.V5;
213+
this.V6 *= other.V6;
214+
this.V7 *= other.V7;
213215
}
214216
else
215217
{
@@ -239,17 +241,18 @@ public unsafe void MultiplyInPlace(ref Block8x8F other)
239241
[MethodImpl(InliningOptions.ShortMethod)]
240242
public void AddInPlace(float value)
241243
{
242-
if (Avx.IsSupported)
244+
// TODO: Vector512
245+
if (Vector256.IsHardwareAccelerated)
243246
{
244247
Vector256<float> valueVec = Vector256.Create(value);
245-
this.V0 = Avx.Add(this.V0, valueVec);
246-
this.V1 = Avx.Add(this.V1, valueVec);
247-
this.V2 = Avx.Add(this.V2, valueVec);
248-
this.V3 = Avx.Add(this.V3, valueVec);
249-
this.V4 = Avx.Add(this.V4, valueVec);
250-
this.V5 = Avx.Add(this.V5, valueVec);
251-
this.V6 = Avx.Add(this.V6, valueVec);
252-
this.V7 = Avx.Add(this.V7, valueVec);
248+
this.V0 += valueVec;
249+
this.V1 += valueVec;
250+
this.V2 += valueVec;
251+
this.V3 += valueVec;
252+
this.V4 += valueVec;
253+
this.V5 += valueVec;
254+
this.V6 += valueVec;
255+
this.V7 += valueVec;
253256
}
254257
else
255258
{
@@ -509,26 +512,26 @@ public override string ToString()
509512
}
510513

511514
/// <summary>
512-
/// Transpose the block inplace.
515+
/// Transpose the block in-place.
513516
/// </summary>
514517
[MethodImpl(InliningOptions.ShortMethod)]
515-
public void TransposeInplace()
518+
public void TransposeInPlace()
516519
{
517520
if (Avx.IsSupported)
518521
{
519522
this.TransposeInplace_Avx();
520523
}
521524
else
522525
{
523-
this.TransposeInplace_Scalar();
526+
this.TransposeInPlace_Scalar();
524527
}
525528
}
526529

527530
/// <summary>
528-
/// Scalar inplace transpose implementation for <see cref="TransposeInplace"/>
531+
/// Scalar in-place transpose implementation for <see cref="TransposeInPlace"/>
529532
/// </summary>
530533
[MethodImpl(InliningOptions.ShortMethod)]
531-
private void TransposeInplace_Scalar()
534+
private void TransposeInPlace_Scalar()
532535
{
533536
ref float elemRef = ref Unsafe.As<Block8x8F, float>(ref this);
534537

src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.Intrinsic.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ private static void FDCT8x8_Avx(ref Block8x8F block)
2020
FDCT8x8_1D_Avx(ref block);
2121

2222
// Second pass - process rows
23-
block.TransposeInplace();
23+
block.TransposeInPlace();
2424
FDCT8x8_1D_Avx(ref block);
2525

2626
// Applies 1D floating point FDCT inplace
@@ -81,7 +81,7 @@ private static void IDCT8x8_Avx(ref Block8x8F transposedBlock)
8181
IDCT8x8_1D_Avx(ref transposedBlock);
8282

8383
// Second pass - process rows
84-
transposedBlock.TransposeInplace();
84+
transposedBlock.TransposeInPlace();
8585
IDCT8x8_1D_Avx(ref transposedBlock);
8686

8787
// Applies 1D floating point FDCT inplace

src/ImageSharp/Formats/Jpeg/Components/FloatingPointDCT.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ public static void AdjustToIDCT(ref Block8x8F quantTable)
7777

7878
// Spectral macroblocks are transposed before quantization
7979
// so we must transpose quantization table
80-
quantTable.TransposeInplace();
80+
quantTable.TransposeInPlace();
8181
}
8282

8383
/// <summary>
@@ -97,7 +97,7 @@ public static void AdjustToFDCT(ref Block8x8F quantTable)
9797
// Spectral macroblocks are not transposed before quantization
9898
// Transpose is done after quantization at zig-zag stage
9999
// so we must transpose quantization table
100-
quantTable.TransposeInplace();
100+
quantTable.TransposeInPlace();
101101
}
102102

103103
/// <summary>
@@ -155,7 +155,7 @@ private static void IDCT_Vector4(ref Block8x8F transposedBlock)
155155
IDCT8x4_Vector4(ref transposedBlock.V0R);
156156

157157
// Second pass - process rows
158-
transposedBlock.TransposeInplace();
158+
transposedBlock.TransposeInPlace();
159159
IDCT8x4_Vector4(ref transposedBlock.V0L);
160160
IDCT8x4_Vector4(ref transposedBlock.V0R);
161161

@@ -225,7 +225,7 @@ private static void FDCT_Vector4(ref Block8x8F block)
225225
FDCT8x4_Vector4(ref block.V0R);
226226

227227
// Second pass - process rows
228-
block.TransposeInplace();
228+
block.TransposeInPlace();
229229
FDCT8x4_Vector4(ref block.V0L);
230230
FDCT8x4_Vector4(ref block.V0R);
231231

src/ImageSharp/Formats/Jpeg/Components/ScaledFloatingPointDCT.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public static void AdjustToIDCT(ref Block8x8F quantTable)
4848

4949
// Spectral macroblocks are transposed before quantization
5050
// so we must transpose quantization table
51-
quantTable.TransposeInplace();
51+
quantTable.TransposeInPlace();
5252
}
5353

5454
/// <summary>

tests/ImageSharp.Benchmarks/Codecs/Jpeg/BlockOperations/Block8x8F_Transpose.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public class Block8x8F_Transpose
1414
[Benchmark]
1515
public float TransposeInplace()
1616
{
17-
this.source.TransposeInplace();
17+
this.source.TransposeInPlace();
1818
return this.source[0];
1919
}
2020

tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ static void RunTest()
130130

131131
Block8x8F block8x8 = Block8x8F.Load(Create8x8FloatData());
132132

133-
block8x8.TransposeInplace();
133+
block8x8.TransposeInPlace();
134134

135135
float[] actual = new float[64];
136136
block8x8.ScaledCopyTo(actual);

0 commit comments

Comments
 (0)