Skip to content

Commit 041e59d

Browse files
All Vector128 Load
1 parent 30bdc29 commit 041e59d

File tree

3 files changed

+63
-7
lines changed

3 files changed

+63
-7
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Vector128.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,36 @@ public void NormalizeColorsAndRoundInPlaceVector128(float maximum)
4040
this.V7R = NormalizeAndRoundVector128(this.V7R.AsVector128(), off, max).AsVector4();
4141
}
4242

43+
/// <summary>
44+
/// Loads values from <paramref name="source"/> using extended AVX2 intrinsics.
45+
/// </summary>
46+
/// <param name="source">The source <see cref="Block8x8"/></param>
47+
public void LoadFromInt16ExtendedVector128(ref Block8x8 source)
48+
{
49+
DebugGuard.IsTrue(Vector128.IsHardwareAccelerated, "Vector128 support is required to run this operation!");
50+
51+
ref Vector128<short> srcBase = ref Unsafe.As<Block8x8, Vector128<short>>(ref source);
52+
ref Vector128<float> destBase = ref Unsafe.As<Block8x8F, Vector128<float>>(ref this);
53+
54+
// Only 8 iterations, one per 128b short block
55+
for (nuint i = 0; i < 8; i++)
56+
{
57+
Vector128<short> src = Unsafe.Add(ref srcBase, i);
58+
59+
// Step 1: Widen short -> int
60+
Vector128<int> lower = Vector128.WidenLower(src); // lower 4 shorts -> 4 ints
61+
Vector128<int> upper = Vector128.WidenUpper(src); // upper 4 shorts -> 4 ints
62+
63+
// Step 2: Convert int -> float
64+
Vector128<float> lowerF = Vector128.ConvertToSingle(lower);
65+
Vector128<float> upperF = Vector128.ConvertToSingle(upper);
66+
67+
// Step 3: Store to destination (this is 16 lanes -> two Vector128<float> blocks)
68+
Unsafe.Add(ref destBase, (i * 2) + 0) = lowerF;
69+
Unsafe.Add(ref destBase, (i * 2) + 1) = upperF;
70+
}
71+
}
72+
4373
[MethodImpl(InliningOptions.ShortMethod)]
4474
private static Vector128<float> NormalizeAndRoundVector128(Vector128<float> value, Vector128<float> off, Vector128<float> max)
4575
=> Vector128_.RoundToNearestInteger(Vector128_.Clamp(value + off, Vector128<float>.Zero, max));

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,11 @@ public void LoadFrom(ref Block8x8 source)
392392
this.LoadFromInt16ExtendedAvx2(ref source);
393393
return;
394394
}
395+
else if (Vector128.IsHardwareAccelerated)
396+
{
397+
this.LoadFromInt16ExtendedVector128(ref source);
398+
return;
399+
}
395400

396401
this.LoadFromInt16Scalar(ref source);
397402
}

tests/ImageSharp.Tests/Formats/Jpg/Block8x8FTests.cs

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public void Indexer()
5555
Times,
5656
() =>
5757
{
58-
var block = default(Block8x8F);
58+
Block8x8F block = default;
5959

6060
for (int i = 0; i < Block8x8F.Size; i++)
6161
{
@@ -68,7 +68,7 @@ public void Indexer()
6868
sum += block[i];
6969
}
7070
});
71-
Assert.Equal(sum, 64f * 63f * 0.5f);
71+
Assert.Equal(64f * 63f * 0.5f, sum);
7272
}
7373

7474
[Fact]
@@ -93,7 +93,7 @@ public void Indexer_ReferenceBenchmarkWithArray()
9393
sum += block[i];
9494
}
9595
});
96-
Assert.Equal(sum, 64f * 63f * 0.5f);
96+
Assert.Equal(64f * 63f * 0.5f, sum);
9797
}
9898

9999
[Fact]
@@ -121,7 +121,7 @@ public void Load_Store_FloatArray()
121121
}
122122

123123
[Fact]
124-
public void TransposeInplace()
124+
public void TransposeInPlace()
125125
{
126126
static void RunTest()
127127
{
@@ -276,7 +276,7 @@ public void RoundInto()
276276
float[] data = Create8x8RandomFloatData(-1000, 1000);
277277

278278
Block8x8F source = Block8x8F.Load(data);
279-
var dest = default(Block8x8);
279+
Block8x8 dest = default;
280280

281281
source.RoundInto(ref dest);
282282

@@ -388,7 +388,7 @@ public void LoadFromUInt16Scalar()
388388

389389
short[] data = Create8x8ShortData();
390390

391-
var source = Block8x8.Load(data);
391+
Block8x8 source = Block8x8.Load(data);
392392

393393
Block8x8F dest = default;
394394
dest.LoadFromInt16Scalar(ref source);
@@ -399,6 +399,27 @@ public void LoadFromUInt16Scalar()
399399
}
400400
}
401401

402+
[Fact]
403+
public void LoadFromUInt16ExtendedVector128()
404+
{
405+
if (this.SkipOnNonVector128Runner())
406+
{
407+
return;
408+
}
409+
410+
short[] data = Create8x8ShortData();
411+
412+
Block8x8 source = Block8x8.Load(data);
413+
414+
Block8x8F dest = default;
415+
dest.LoadFromInt16ExtendedVector128(ref source);
416+
417+
for (int i = 0; i < Block8x8F.Size; i++)
418+
{
419+
Assert.Equal(data[i], dest[i]);
420+
}
421+
}
422+
402423
[Fact]
403424
public void LoadFromUInt16ExtendedAvx2()
404425
{
@@ -409,7 +430,7 @@ public void LoadFromUInt16ExtendedAvx2()
409430

410431
short[] data = Create8x8ShortData();
411432

412-
var source = Block8x8.Load(data);
433+
Block8x8 source = Block8x8.Load(data);
413434

414435
Block8x8F dest = default;
415436
dest.LoadFromInt16ExtendedAvx2(ref source);

0 commit comments

Comments
 (0)