100100 sum = _mm_packs_epi32 (sum1 , sum1 );\
101101 INSTR ((__m128i * )(DST + OFST5 ), sum );
102102
103- #ifdef __linux__
103+ #ifdef __GNUC__
104104#ifndef __cplusplus
105105__attribute__((visibility ("hidden" )))
106106#endif
@@ -121,7 +121,7 @@ EB_ALIGN(16) const EB_S16 DstTransformAsmConst_SSE2[] = {
121121 //55, -29, 55, -29, 55, -29, 55, -29,
122122};
123123
124- #ifdef __linux__
124+ #ifdef __GNUC__
125125#ifndef __cplusplus
126126__attribute__((visibility ("hidden" )))
127127#endif
@@ -318,7 +318,7 @@ EB_ALIGN(16) const EB_S16 InvTransformAsmConst_SSE2[] = {
318318 54 , 90 , 54 , 90 , 54 , 90 , 54 , 90
319319};
320320
321- #ifdef __linux__
321+ #ifdef __GNUC__
322322#ifndef __cplusplus
323323__attribute__((visibility ("hidden" )))
324324#endif
@@ -365,7 +365,7 @@ EB_EXTERN const EB_S16 coeff_tbl2[48 * 8] =
365365 54 , 67 , -31 , -73 , 4 , 78 , 22 , -82 , -46 , 85 , 67 , -88 , -82 , 90 , 90 , -90
366366};
367367
368- #ifdef __linux__
368+ #ifdef __GNUC__
369369#ifndef __cplusplus
370370__attribute__((visibility ("hidden" )))
371371#endif
@@ -2535,12 +2535,12 @@ void Transform4x4_SSE2_INTRIN(
25352535 xmm3 = _mm_loadl_epi64 ((__m128i * )(residual + 3 * srcStride ));
25362536 xmm0 = _mm_unpacklo_epi16 (xmm0 , xmm1 );
25372537 xmm2 = _mm_unpacklo_epi16 (xmm2 , xmm3 );
2538-
2538+
25392539 xmm1 = _mm_unpackhi_epi32 (xmm0 , xmm2 );
25402540 xmm0 = _mm_unpacklo_epi32 (xmm0 , xmm2 );
25412541 xmm1 = _mm_unpacklo_epi64 (_mm_srli_si128 (xmm1 , 8 ), xmm1 );
25422542 xmm3 = _mm_sub_epi16 (xmm0 , xmm1 );
2543- xmm0 = _mm_add_epi16 (xmm0 , xmm1 );
2543+ xmm0 = _mm_add_epi16 (xmm0 , xmm1 );
25442544
25452545 xmm4 = xmm2 = xmm0 ;
25462546 xmm0 = _mm_srli_si128 (xmm0 , 8 );
@@ -2551,7 +2551,7 @@ void Transform4x4_SSE2_INTRIN(
25512551 xmm_shift = _mm_cvtsi32_si128 (bitIncrement + 1 );
25522552
25532553 xmm1 = _mm_unpacklo_epi16 (xmm3 , _mm_srli_si128 (xmm3 , 8 ));
2554-
2554+
25552555 xmm3 = _mm_madd_epi16 (xmm1 , _mm_load_si128 ((__m128i * )(transformIntrinConst_SSE2 + OFFSET_36_N83 )));
25562556 xmm1 = _mm_madd_epi16 (xmm1 , _mm_load_si128 ((__m128i * )(transformIntrinConst_SSE2 + OFFSET_83_36 )));
25572557 xmm1 = _mm_add_epi32 (xmm1 , xmm_offset );
@@ -2577,14 +2577,14 @@ void Transform4x4_SSE2_INTRIN(
25772577 (void )transformCoefficients ;
25782578 (void )transformInnerArrayPtr ;
25792579
2580- #undef OFFSET_128
2581- #undef OFFSET_64_64
2582- #undef OFFSET_83_36
2580+ #undef OFFSET_128
2581+ #undef OFFSET_64_64
2582+ #undef OFFSET_83_36
25832583#undef OFFSET_N36_N83
2584- #undef OFFSET_64_N64
2585- #undef OFFSET_N64_64
2586- #undef OFFSET_36_N83
2587- #undef OFFSET_83_N36
2584+ #undef OFFSET_64_N64
2585+ #undef OFFSET_N64_64
2586+ #undef OFFSET_36_N83
2587+ #undef OFFSET_83_N36
25882588}
25892589
25902590void DstTransform4x4_SSE2_INTRIN (
@@ -2619,7 +2619,7 @@ void DstTransform4x4_SSE2_INTRIN(
26192619 xmm_res2 = _mm_loadl_epi64 ((__m128i * )(residual + 2 * srcStride ));
26202620 xmm_res3 = _mm_loadl_epi64 ((__m128i * )(residual + 3 * srcStride ));
26212621 xmm_offset = _mm_srli_epi32 (_mm_slli_epi32 (_mm_load_si128 ((__m128i * )(DstTransformAsmConst_SSE2 + OFFSET_DST_1 )), shift ), 1 );
2622-
2622+
26232623 xmm_res0_1 = _mm_unpacklo_epi32 (xmm_res0 , xmm_res1 ); // |res01 |res-S1-01|res23 |res-S1-23|
26242624 xmm_res2_3 = _mm_unpacklo_epi32 (xmm_res2 , xmm_res3 ); // |res-S2-01|res-S3-01|res-S2-23|res-S3-23|
26252625 xmm_res_hi = _mm_unpackhi_epi64 (xmm_res0_1 , xmm_res2_3 ); // |res23 |res-S1-23|res-S2-23|res-S3-23|
@@ -2629,7 +2629,7 @@ void DstTransform4x4_SSE2_INTRIN(
26292629 MACRO_TRANS_2MAC_NO_SAVE (xmm_res_lo , xmm_res_hi , xmm_trans1 , xmm_temp , xmm_offset , OFFSET_DST_74_74 , OFFSET_DST_0_N74 , shift )
26302630 MACRO_TRANS_2MAC_NO_SAVE (xmm_res_lo , xmm_res_hi , xmm_trans2 , xmm_temp , xmm_offset , OFFSET_DST_84_N29 , OFFSET_DST_N74_55 , shift )
26312631 MACRO_TRANS_2MAC_NO_SAVE (xmm_res_lo , xmm_res_hi , xmm_trans3 , xmm_temp , xmm_offset , OFFSET_DST_55_N84 , OFFSET_DST_74_N29 , shift )
2632-
2632+
26332633 // Second Partial Bufferfly
26342634 xmm_offset = _mm_set1_epi32 (0x00000080 ); // 128
26352635 xmm_trans0_1 = _mm_unpacklo_epi32 (xmm_trans0 , xmm_trans1 );
@@ -2641,7 +2641,7 @@ void DstTransform4x4_SSE2_INTRIN(
26412641 MACRO_TRANS_2MAC (xmm_trans_lo , xmm_trans_hi , xmm_trans1 , xmm_temp , xmm_offset , OFFSET_DST_74_74 , OFFSET_DST_0_N74 , 8 , dstStride )
26422642 MACRO_TRANS_2MAC (xmm_trans_lo , xmm_trans_hi , xmm_trans2 , xmm_temp , xmm_offset , OFFSET_DST_84_N29 , OFFSET_DST_N74_55 , 8 , (2 * dstStride ))
26432643 MACRO_TRANS_2MAC (xmm_trans_lo , xmm_trans_hi , xmm_trans3 , xmm_temp , xmm_offset , OFFSET_DST_55_N84 , OFFSET_DST_74_N29 , 8 , (3 * dstStride ))
2644-
2644+
26452645 (void )transformInnerArrayPtr ;
26462646}
26472647
@@ -2656,7 +2656,7 @@ void Transform8x8_SSE2_INTRIN(
26562656 // Transform8x8 has its own table because the larger table's offset macros exceed 256 (which is maximum macro expansion depth
26572657 // Use a smaller table with values just for Transform8x8.
26582658
2659- EB_ALIGN (16 ) EB_S16 transformIntrinConst_8x8 [] = {
2659+ EB_ALIGN (16 ) EB_S16 transformIntrinConst_8x8 [] = {
26602660 83 , 36 , 83 , 36 , 83 , 36 , 83 , 36 ,
26612661 36 , -83 , 36 , -83 , 36 , -83 , 36 , -83 ,
26622662 89 , 75 , 89 , 75 , 89 , 75 , 89 , 75 ,
@@ -2676,15 +2676,15 @@ void Transform8x8_SSE2_INTRIN(
26762676 36 , 83 , 36 , 83 , 36 , 83 , 36 , 83 ,
26772677 50 , 89 , 50 , 89 , 50 , 89 , 50 , 89 ,
26782678 18 , -75 , 18 , -75 , 18 , -75 , 18 , -75 ,
2679- -64 , 64 , -64 , 64 , -64 , 64 , -64 , 64 ,
2679+ -64 , 64 , -64 , 64 , -64 , 64 , -64 , 64 ,
26802680 64 , -64 , 64 , -64 , 64 , -64 , 64 , -64 ,
26812681 -75 , -18 , -75 , -18 , -75 , -18 , -75 , -18 ,
26822682 89 , -50 , 89 , -50 , 89 , -50 , 89 , -50 ,
2683- 83 , -36 , 83 , -36 , 83 , -36 , 83 , -36 ,
2684- -36 , 83 , -36 , 83 , -36 , 83 , -36 , 83 ,
2685- -83 , 36 , -83 , 36 , -83 , 36 , -83 , 36 ,
2683+ 83 , -36 , 83 , -36 , 83 , -36 , 83 , -36 ,
2684+ -36 , 83 , -36 , 83 , -36 , 83 , -36 , 83 ,
2685+ -83 , 36 , -83 , 36 , -83 , 36 , -83 , 36 ,
26862686 89 , -75 , 89 , -75 , 89 , -75 , 89 , -75 ,
2687- 50 , -18 , 50 , -18 , 50 , -18 , 50 , -18 ,
2687+ 50 , -18 , 50 , -18 , 50 , -18 , 50 , -18 ,
26882688 };
26892689 __m128i sum , sum1 , sum2 , sum3 , sum4 ;
26902690 __m128i res0 , res1 , res2 , res3 , res4 , res5 , res6 , res7 ;
@@ -2711,15 +2711,15 @@ void Transform8x8_SSE2_INTRIN(
27112711 MACRO_UNPACK (32 , res0 , res2 , res01 , res23 , res4 , res6 , res45 , res67 , res02 , res0123 , res46 , res4567 )
27122712 MACRO_UNPACK (64 , res0 , res4 , res02 , res46 , res01 , res45 , res0123 , res4567 , res04 , res0246 , res0145 , res0_to_7 )
27132713 MACRO_CALC_EVEN_ODD (res0 , res04 , res02 , res0246 , res01 , res0145 , res0123 , res0_to_7 )
2714-
2714+
27152715 evenEven0 = _mm_add_epi16 (even0 , even3 );
27162716 evenEven1 = _mm_add_epi16 (even1 , even2 );
27172717 evenOdd0 = _mm_sub_epi16 (even0 , even3 );
27182718 evenOdd1 = _mm_sub_epi16 (even1 , even2 );
27192719
27202720 shift = 4 - bitIncrement ;
27212721 trans0 = _mm_slli_epi16 (_mm_add_epi16 (evenEven0 , evenEven1 ), shift );
2722- trans4 = _mm_slli_epi16 (_mm_sub_epi16 (evenEven0 , evenEven1 ), shift );
2722+ trans4 = _mm_slli_epi16 (_mm_sub_epi16 (evenEven0 , evenEven1 ), shift );
27232723
27242724 xmm_offset = _mm_slli_epi32 (_mm_set1_epi32 (0x00000002 ), bitIncrement );
27252725 shift = bitIncrement + 2 ;
@@ -2729,13 +2729,13 @@ void Transform8x8_SSE2_INTRIN(
27292729
27302730 trans6 = _mm_packs_epi32 (_mm_srai_epi32 (_mm_add_epi32 (_mm_madd_epi16 (_mm_load_si128 ((__m128i * )(TransformIntrinConst + TRANS8x8_OFFSET_36_N83 )),_mm_unpacklo_epi16 (evenOdd0 , evenOdd1 )), xmm_offset ), shift ),
27312731 _mm_srai_epi32 (_mm_add_epi32 (_mm_madd_epi16 (_mm_load_si128 ((__m128i * )(TransformIntrinConst + TRANS8x8_OFFSET_36_N83 )),_mm_unpackhi_epi16 (evenOdd0 , evenOdd1 )), xmm_offset ), shift ));
2732-
2732+
27332733 // TransformCoefficients 1, 3, 5, 7
27342734 odd01_lo = _mm_unpacklo_epi16 (odd0 , odd1 );
27352735 odd01_hi = _mm_unpackhi_epi16 (odd0 , odd1 );
27362736 odd23_lo = _mm_unpacklo_epi16 (odd2 , odd3 );
27372737 odd23_hi = _mm_unpackhi_epi16 (odd2 , odd3 );
2738-
2738+
27392739 MACRO_TRANS_4MAC_NO_SAVE (odd01_lo , odd01_hi , odd23_lo , odd23_hi , trans1 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_89_75 , TRANS8x8_OFFSET_50_18 , shift )
27402740 MACRO_TRANS_4MAC_NO_SAVE (odd01_lo , odd01_hi , odd23_lo , odd23_hi , trans3 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_75_N18 , TRANS8x8_OFFSET_N89_N50 , shift )
27412741 MACRO_TRANS_4MAC_NO_SAVE (odd01_lo , odd01_hi , odd23_lo , odd23_hi , trans5 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_50_N89 , TRANS8x8_OFFSET_18_75 , shift )
@@ -2745,17 +2745,17 @@ void Transform8x8_SSE2_INTRIN(
27452745 MACRO_UNPACK (64 , trans0 , trans2 , trans01 , trans23 , trans4 , trans6 , trans45 , trans67 , trans02 , trans0123 , trans46 , trans4567 )
27462746
27472747 xmm_offset = _mm_loadu_si128 ((__m128i * )(TransformIntrinConst + TRANS8x8_OFFSET_256 ));
2748-
2748+
27492749 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_64_64 , TRANS8x8_OFFSET_64_64 , TRANS8x8_OFFSET_64_64 , TRANS8x8_OFFSET_64_64 , 9 , _mm_storeu_si128 , transformCoefficients , 0 )
27502750 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_89_75 , TRANS8x8_OFFSET_50_18 , TRANS8x8_OFFSET_N18_N50 , TRANS8x8_OFFSET_N75_N89 , 9 , _mm_storeu_si128 , transformCoefficients , (dstStride ))
27512751 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_83_36 , TRANS8x8_OFFSET_N36_N83 , TRANS8x8_OFFSET_N83_N36 , TRANS8x8_OFFSET_36_83 , 9 , _mm_storeu_si128 , transformCoefficients , (2 * dstStride ))
27522752 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_75_N18 , TRANS8x8_OFFSET_N89_N50 , TRANS8x8_OFFSET_50_89 , TRANS8x8_OFFSET_18_N75 , 9 , _mm_storeu_si128 , transformCoefficients , (3 * dstStride ))
2753- transformCoefficients += 4 * dstStride ;
2753+ transformCoefficients += 4 * dstStride ;
27542754 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_64_N64 , TRANS8x8_OFFSET_N64_64 , TRANS8x8_OFFSET_64_N64 , TRANS8x8_OFFSET_N64_64 , 9 , _mm_storeu_si128 , transformCoefficients , 0 )
27552755 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_50_N89 , TRANS8x8_OFFSET_18_75 , TRANS8x8_OFFSET_N75_N18 , TRANS8x8_OFFSET_89_N50 , 9 , _mm_storeu_si128 , transformCoefficients , (dstStride ))
27562756 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_36_N83 , TRANS8x8_OFFSET_83_N36 , TRANS8x8_OFFSET_N36_83 , TRANS8x8_OFFSET_N83_36 , 9 , _mm_storeu_si128 , transformCoefficients , (2 * dstStride ))
27572757 MACRO_TRANS_8MAC (trans0 , trans02 , trans01 , trans0123 , trans4 , trans46 , trans45 , trans4567 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_18_N50 , TRANS8x8_OFFSET_75_N89 , TRANS8x8_OFFSET_89_N75 , TRANS8x8_OFFSET_50_N18 , 9 , _mm_storeu_si128 , transformCoefficients , (3 * dstStride ))
2758-
2758+
27592759 (void )transformInnerArrayPtr ;
27602760}
27612761
@@ -2855,10 +2855,10 @@ void PfreqTransform8x8_SSE2_INTRIN(
28552855 MACRO_TRANS_4MAC_NO_SAVE (odd01_lo , odd01_hi , odd23_lo , odd23_hi , trans3 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_75_N18 , TRANS8x8_OFFSET_N89_N50 , shift )
28562856 //MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans5, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_50_N89, TRANS8x8_OFFSET_18_75, shift)
28572857 //MACRO_TRANS_4MAC_NO_SAVE(odd01_lo, odd01_hi, odd23_lo, odd23_hi, trans7, xmm_offset, TransformIntrinConst, TRANS8x8_OFFSET_18_N50, TRANS8x8_OFFSET_75_N89, shift)
2858-
2858+
28592859 MACRO_UNPACK (32 , trans0 , trans1 , trans2 , trans3 , trans4 /*, trans5, trans6, trans7*/ , trans1 , trans1 , trans1 , trans01 , trans23 , trans45 , trans67 )
28602860 MACRO_UNPACK_V2 (64 , trans0 , trans2 , trans01 , trans23 , trans4 , trans0 , /*trans6,*/ trans45 , trans67 , trans02 , trans0123 )
2861-
2861+
28622862 xmm_offset = _mm_loadu_si128 ((__m128i * )(TransformIntrinConst + TRANS8x8_OFFSET_256 ));
28632863
28642864 MACRO_TRANS_8MAC_PF_N2 (trans0 , trans02 , trans01 , trans0123 , trans4 , trans45 , trans45 , trans45 , xmm_offset , TransformIntrinConst , TRANS8x8_OFFSET_64_64 , TRANS8x8_OFFSET_64_64 , TRANS8x8_OFFSET_64_64 , TRANS8x8_OFFSET_64_64 , 9 , _mm_storeu_si128 , transformCoefficients , 0 )
0 commit comments