@@ -3858,127 +3858,6 @@ static etcpak_force_inline uint64_t ProcessAlpha_ETC2( const uint8_t* src )
38583858#endif
38593859}
38603860
3861- void CompressEtc1Alpha ( const uint32_t * src, uint64_t * dst, uint32_t blocks, size_t width )
3862- {
3863- int w = 0 ;
3864- uint32_t buf[4 *4 ];
3865- do
3866- {
3867- #ifdef __SSE4_1__
3868- __m128 px0 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 0 ) ) );
3869- __m128 px1 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 1 ) ) );
3870- __m128 px2 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 2 ) ) );
3871- __m128 px3 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 3 ) ) );
3872-
3873- _MM_TRANSPOSE4_PS ( px0, px1, px2, px3 );
3874-
3875- __m128i c0 = _mm_castps_si128 ( px0 );
3876- __m128i c1 = _mm_castps_si128 ( px1 );
3877- __m128i c2 = _mm_castps_si128 ( px2 );
3878- __m128i c3 = _mm_castps_si128 ( px3 );
3879-
3880- __m128i mask = _mm_setr_epi32 ( 0x03030303 , 0x07070707 , 0x0b0b0b0b , 0x0f0f0f0f );
3881- __m128i p0 = _mm_shuffle_epi8 ( c0, mask );
3882- __m128i p1 = _mm_shuffle_epi8 ( c1, mask );
3883- __m128i p2 = _mm_shuffle_epi8 ( c2, mask );
3884- __m128i p3 = _mm_shuffle_epi8 ( c3, mask );
3885-
3886- _mm_store_si128 ( (__m128i*)(buf + 0 ), p0 );
3887- _mm_store_si128 ( (__m128i*)(buf + 4 ), p1 );
3888- _mm_store_si128 ( (__m128i*)(buf + 8 ), p2 );
3889- _mm_store_si128 ( (__m128i*)(buf + 12 ), p3 );
3890-
3891- src += 4 ;
3892- #else
3893- auto ptr = buf;
3894- for ( int x=0 ; x<4 ; x++ )
3895- {
3896- unsigned int a = *src >> 24 ;
3897- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3898- src += width;
3899- a = *src >> 24 ;
3900- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3901- src += width;
3902- a = *src >> 24 ;
3903- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3904- src += width;
3905- a = *src >> 24 ;
3906- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3907- src -= width * 3 - 1 ;
3908- }
3909- #endif
3910- if ( ++w == width/4 )
3911- {
3912- src += width * 3 ;
3913- w = 0 ;
3914- }
3915- *dst++ = ProcessRGB ( (uint8_t *)buf );
3916- }
3917- while ( --blocks );
3918- }
3919-
3920- void CompressEtc2Alpha ( const uint32_t * src, uint64_t * dst, uint32_t blocks, size_t width, bool useHeuristics )
3921- {
3922- int w = 0 ;
3923- uint32_t buf[4 *4 ];
3924- do
3925- {
3926- #ifdef __SSE4_1__
3927- __m128 px0 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 0 ) ) );
3928- __m128 px1 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 1 ) ) );
3929- __m128 px2 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 2 ) ) );
3930- __m128 px3 = _mm_castsi128_ps ( _mm_loadu_si128 ( (__m128i*)( src + width * 3 ) ) );
3931-
3932- _MM_TRANSPOSE4_PS ( px0, px1, px2, px3 );
3933-
3934- __m128i c0 = _mm_castps_si128 ( px0 );
3935- __m128i c1 = _mm_castps_si128 ( px1 );
3936- __m128i c2 = _mm_castps_si128 ( px2 );
3937- __m128i c3 = _mm_castps_si128 ( px3 );
3938-
3939- __m128i mask = _mm_setr_epi32 ( 0x03030303 , 0x07070707 , 0x0b0b0b0b , 0x0f0f0f0f );
3940- __m128i p0 = _mm_shuffle_epi8 ( c0, mask );
3941- __m128i p1 = _mm_shuffle_epi8 ( c1, mask );
3942- __m128i p2 = _mm_shuffle_epi8 ( c2, mask );
3943- __m128i p3 = _mm_shuffle_epi8 ( c3, mask );
3944-
3945- _mm_store_si128 ( (__m128i*)(buf + 0 ), p0 );
3946- _mm_store_si128 ( (__m128i*)(buf + 4 ), p1 );
3947- _mm_store_si128 ( (__m128i*)(buf + 8 ), p2 );
3948- _mm_store_si128 ( (__m128i*)(buf + 12 ), p3 );
3949-
3950- src += 4 ;
3951- #else
3952- auto ptr = buf;
3953- for ( int x=0 ; x<4 ; x++ )
3954- {
3955- unsigned int a = *src >> 24 ;
3956- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3957- src += width;
3958- a = *src >> 24 ;
3959- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3960- src += width;
3961- a = *src >> 24 ;
3962- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3963- src += width;
3964- a = *src >> 24 ;
3965- *ptr++ = a | ( a << 8 ) | ( a << 16 );
3966- src -= width * 3 - 1 ;
3967- }
3968- #endif
3969- if ( ++w == width/4 )
3970- {
3971- src += width * 3 ;
3972- w = 0 ;
3973- }
3974- *dst++ = ProcessRGB_ETC2 ( (uint8_t *)buf, useHeuristics );
3975- }
3976- while ( --blocks );
3977- }
3978-
3979- #include < chrono>
3980- #include < thread>
3981-
39823861void CompressEtc1Rgb ( const uint32_t * src, uint64_t * dst, uint32_t blocks, size_t width )
39833862{
39843863 int w = 0 ;
0 commit comments