Skip to content

Commit 5416edb

Browse files
committed
Vectorize TrimTransparentPixels in GifEncoderCore
1 parent 4b15595 commit 5416edb

File tree

1 file changed

+126
-7
lines changed

1 file changed

+126
-7
lines changed

src/ImageSharp/Formats/Gif/GifEncoderCore.cs

Lines changed: 126 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,23 +412,142 @@ private static Buffer2DRegion<byte> TrimTransparentPixels(Buffer2D<byte> buffer,
412412
int bottom = int.MaxValue;
413413
int left = int.MaxValue;
414414
int right = int.MinValue;
415-
416-
// Run through th buffer in a single pass. Use variables to track the min/max values.
417415
int minY = -1;
418416
bool isTransparentRow = true;
417+
418+
// Run through the buffer in a single pass. Use variables to track the min/max values.
419419
for (int y = 0; y < buffer.Height; y++)
420420
{
421421
isTransparentRow = true;
422422
Span<byte> rowSpan = buffer.DangerousGetRowSpan(y);
423+
ref byte rowPtr = ref MemoryMarshal.GetReference(rowSpan);
424+
nint rowLength = (nint)(uint)rowSpan.Length;
425+
nint x = 0;
426+
427+
#if NET7_0_OR_GREATER
428+
if (Vector128.IsHardwareAccelerated && rowLength >= Vector128<byte>.Count)
429+
{
430+
Vector256<byte> trimmableVec256 = Vector256.Create(trimmableIndex);
431+
432+
if (Vector256.IsHardwareAccelerated && rowLength >= Vector256<byte>.Count)
433+
{
434+
do
435+
{
436+
Vector256<byte> vec = Vector256.LoadUnsafe(ref rowPtr, (nuint)x);
437+
Vector256<byte> notEquals = ~Vector256.Equals(vec, trimmableVec256);
438+
439+
if (notEquals != Vector256<byte>.Zero)
440+
{
441+
isTransparentRow = false;
442+
uint mask = notEquals.ExtractMostSignificantBits();
443+
nint start = x + (nint)uint.TrailingZeroCount(mask);
444+
nint end = (nint)uint.LeadingZeroCount(mask);
445+
446+
// end is from the end, but we need the index from the beginning
447+
end = x + Vector256<byte>.Count - 1 - end;
448+
449+
left = Math.Min(left, (int)start);
450+
right = Math.Max(right, (int)end);
451+
}
452+
453+
x += Vector256<byte>.Count;
454+
}
455+
while (x <= rowLength - Vector256<byte>.Count);
456+
}
457+
458+
Vector128<byte> trimmableVec = Vector256.IsHardwareAccelerated
459+
? trimmableVec256.GetLower()
460+
: Vector128.Create(trimmableIndex);
461+
462+
while (x <= rowLength - Vector128<byte>.Count)
463+
{
464+
Vector128<byte> vec = Vector128.LoadUnsafe(ref rowPtr, (nuint)x);
465+
Vector128<byte> notEquals = ~Vector128.Equals(vec, trimmableVec);
466+
467+
if (notEquals != Vector128<byte>.Zero)
468+
{
469+
isTransparentRow = false;
470+
uint mask = notEquals.ExtractMostSignificantBits();
471+
nint start = x + (nint)uint.TrailingZeroCount(mask);
472+
nint end = (nint)uint.LeadingZeroCount(mask) - Vector128<byte>.Count;
473+
474+
// end is from the end, but we need the index from the beginning
475+
end = x + Vector128<byte>.Count - 1 - end;
476+
477+
left = Math.Min(left, (int)start);
478+
right = Math.Max(right, (int)end);
479+
}
480+
481+
x += Vector128<byte>.Count;
482+
}
483+
}
484+
#else
485+
if (Sse41.IsSupported && rowLength >= Vector128<byte>.Count)
486+
{
487+
Vector256<byte> trimmableVec256 = Vector256.Create(trimmableIndex);
488+
489+
if (Avx2.IsSupported && rowLength >= Vector256<byte>.Count)
490+
{
491+
do
492+
{
493+
Vector256<byte> vec = Unsafe.ReadUnaligned<Vector256<byte>>(ref Unsafe.Add(ref rowPtr, x));
494+
Vector256<byte> notEquals = Avx2.CompareEqual(vec, trimmableVec256);
495+
notEquals = Avx2.Xor(notEquals, Vector256<byte>.AllBitsSet);
496+
497+
if (!Avx.TestZ(notEquals, notEquals))
498+
{
499+
isTransparentRow = false;
500+
int mask = Avx2.MoveMask(notEquals);
501+
nint start = x + (nint)(uint)BitOperations.TrailingZeroCount(mask);
502+
nint end = (nint)(uint)BitOperations.LeadingZeroCount((uint)mask);
503+
504+
// end is from the end, but we need the index from the beginning
505+
end = x + Vector256<byte>.Count - 1 - end;
506+
507+
left = Math.Min(left, (int)start);
508+
right = Math.Max(right, (int)end);
509+
}
510+
511+
x += Vector256<byte>.Count;
512+
}
513+
while (x <= rowLength - Vector256<byte>.Count);
514+
}
515+
516+
Vector128<byte> trimmableVec = Sse41.IsSupported
517+
? trimmableVec256.GetLower()
518+
: Vector128.Create(trimmableIndex);
519+
520+
while (x <= rowLength - Vector128<byte>.Count)
521+
{
522+
Vector128<byte> vec = Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.Add(ref rowPtr, x));
523+
Vector128<byte> notEquals = Sse2.CompareEqual(vec, trimmableVec);
524+
notEquals = Sse2.Xor(notEquals, Vector128<byte>.AllBitsSet);
525+
526+
if (!Sse41.TestZ(notEquals, notEquals))
527+
{
528+
isTransparentRow = false;
529+
int mask = Sse2.MoveMask(notEquals);
530+
nint start = x + (nint)(uint)BitOperations.TrailingZeroCount(mask);
531+
nint end = (nint)(uint)BitOperations.LeadingZeroCount((uint)mask) - Vector128<byte>.Count;
423532

424-
// TODO: It may be possible to optimize this inner loop using SIMD.
425-
for (int x = 0; x < rowSpan.Length; x++)
533+
// end is from the end, but we need the index from the beginning
534+
end = x + Vector128<byte>.Count - 1 - end;
535+
536+
left = Math.Min(left, (int)start);
537+
right = Math.Max(right, (int)end);
538+
}
539+
540+
x += Vector128<byte>.Count;
541+
}
542+
}
543+
#endif
544+
for (; x < rowLength; ++x)
426545
{
427-
if (rowSpan[x] != trimmableIndex)
546+
if (Unsafe.Add(ref rowPtr, x) != trimmableIndex)
428547
{
429548
isTransparentRow = false;
430-
left = Math.Min(left, x);
431-
right = Math.Max(right, x);
549+
left = Math.Min(left, (int)x);
550+
right = Math.Max(right, (int)x);
432551
}
433552
}
434553

0 commit comments

Comments
 (0)