@@ -71,10 +71,10 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
71
71
PixelOperations < TPixel > . Instance . ToRgba32 ( configuration , nextFrame . DangerousGetPixelRowMemory ( y ) . Span , next ) ;
72
72
}
73
73
74
- ref Vector256 < byte > previousBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( previous ) ) ;
75
- ref Vector256 < byte > currentBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( current ) ) ;
76
- ref Vector256 < byte > nextBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( next ) ) ;
77
- ref Vector256 < byte > resultBase = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( result ) ) ;
74
+ ref Vector256 < byte > previousBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( previous ) ) ;
75
+ ref Vector256 < byte > currentBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( current ) ) ;
76
+ ref Vector256 < byte > nextBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( next ) ) ;
77
+ ref Vector256 < byte > resultBase256 = ref Unsafe . As < Rgba32 , Vector256 < byte > > ( ref MemoryMarshal . GetReference ( result ) ) ;
78
78
79
79
int i = 0 ;
80
80
uint x = 0 ;
@@ -93,19 +93,19 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
93
93
94
94
while ( remaining >= 8 )
95
95
{
96
- Vector256 < uint > p = Unsafe . Add ( ref previousBase , x ) . AsUInt32 ( ) ;
97
- Vector256 < uint > c = Unsafe . Add ( ref currentBase , x ) . AsUInt32 ( ) ;
96
+ Vector256 < uint > p = Unsafe . Add ( ref previousBase256 , x ) . AsUInt32 ( ) ;
97
+ Vector256 < uint > c = Unsafe . Add ( ref currentBase256 , x ) . AsUInt32 ( ) ;
98
98
99
99
Vector256 < uint > eq = Avx2 . CompareEqual ( p , c ) ;
100
100
Vector256 < uint > r = Avx2 . BlendVariable ( c , r256 , Avx2 . And ( eq , vmb256 ) ) ;
101
101
102
102
if ( nextFrame != null )
103
103
{
104
- Vector256 < int > n = Avx2 . ShiftRightLogical ( Unsafe . Add ( ref nextBase , x ) . AsUInt32 ( ) , 24 ) . AsInt32 ( ) ;
104
+ Vector256 < int > n = Avx2 . ShiftRightLogical ( Unsafe . Add ( ref nextBase256 , x ) . AsUInt32 ( ) , 24 ) . AsInt32 ( ) ;
105
105
eq = Avx2 . AndNot ( Avx2 . CompareGreaterThan ( Avx2 . ShiftRightLogical ( c , 24 ) . AsInt32 ( ) , n ) . AsUInt32 ( ) , eq ) ;
106
106
}
107
107
108
- Unsafe . Add ( ref resultBase , x ) = r. AsByte ( ) ;
108
+ Unsafe . Add ( ref resultBase256 , x ) = r. AsByte ( ) ;
109
109
110
110
uint msk = ( uint ) Avx2 . MoveMask ( eq . AsByte ( ) ) ;
111
111
msk = ~ msk ;
@@ -128,9 +128,10 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
128
128
}
129
129
}
130
130
131
- // TODO: There's a bug here. See WebpEncoderTests.Encode_AnimatedLossless
132
- if ( Sse2 . IsSupported && remaining >= 4 && false )
131
+ if ( Sse2 . IsSupported && remaining >= 4 )
133
132
{
133
+ // Update offset since we may be operating on the remainder previously incremented by pixel steps of 8.
134
+ x *= 2 ;
134
135
Vector128 < uint > r128 = previousFrame != null ? Vector128 . Create ( bg . PackedValue ) : Vector128 < uint > . Zero ;
135
136
Vector128 < uint > vmb128 = Vector128 < uint > . Zero ;
136
137
if ( blend )
@@ -140,19 +141,19 @@ public static (bool Difference, Rectangle Bounds) DeDuplicatePixels<TPixel>(
140
141
141
142
while ( remaining >= 4 )
142
143
{
143
- Vector128 < uint > p = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref previousBase ) , x ) ;
144
- Vector128 < uint > c = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref currentBase ) , x ) ;
144
+ Vector128 < uint > p = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref previousBase256 ) , x ) ;
145
+ Vector128 < uint > c = Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref currentBase256 ) , x ) ;
145
146
146
147
Vector128 < uint > eq = Sse2 . CompareEqual ( p , c ) ;
147
148
Vector128 < uint > r = SimdUtils . HwIntrinsics . BlendVariable ( c , r128 , Sse2 . And ( eq , vmb128 ) ) ;
148
149
149
150
if ( nextFrame != null )
150
151
{
151
- Vector128 < int > n = Sse2 . ShiftRightLogical ( Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref nextBase ) , x ) , 24 ) . AsInt32 ( ) ;
152
+ Vector128 < int > n = Sse2 . ShiftRightLogical ( Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref nextBase256 ) , x ) , 24 ) . AsInt32 ( ) ;
152
153
eq = Sse2 . AndNot ( Sse2 . CompareGreaterThan ( Sse2 . ShiftRightLogical ( c , 24 ) . AsInt32 ( ) , n ) . AsUInt32 ( ) , eq ) ;
153
154
}
154
155
155
- Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref resultBase ) , x ) = r;
156
+ Unsafe . Add ( ref Unsafe . As < Vector256 < byte > , Vector128 < uint > > ( ref resultBase256 ) , x ) = r;
156
157
157
158
ushort msk = ( ushort ) ( uint ) Sse2 . MoveMask ( eq . AsByte ( ) ) ;
158
159
msk = ( ushort ) ~ msk ;
0 commit comments