@@ -37,16 +37,41 @@ SIMDE__BEGIN_DECLS
3737# define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
3838#endif
3939
40- #define SIMDE_SIDD_CMP_EQUAL_ANY 0
41- #define SIMDE_SIDD_CMP_RANGES 1
42- #define SIMDE_SIDD_CMP_EQUAL_EACH 2
43- #define SIMDE_SIDD_CMP_EQUAL_ORDERED 3
40+ #if defined(SIMDE_X86_SSE4_2_NATIVE )
41+ # define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS
42+ # define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS
43+ # define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS
44+ # define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS
45+ # define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY
46+ # define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES
47+ # define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH
48+ # define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED
49+ # define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY
50+ # define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY
51+ # define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY
52+ # define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY
53+ # define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT
54+ # define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT
55+ # define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK
56+ # define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK
4457
45- #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES )
46- #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY
47- #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES
48- #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH
49- #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED
58+ #else
59+ # define SIMDE_SIDD_UBYTE_OPS 0x00
60+ # define SIMDE_SIDD_UWORD_OPS 0x01
61+ # define SIMDE_SIDD_SBYTE_OPS 0x02
62+ # define SIMDE_SIDD_SWORD_OPS 0x03
63+ # define SIMDE_SIDD_CMP_EQUAL_ANY 0x00
64+ # define SIMDE_SIDD_CMP_RANGES 0x04
65+ # define SIMDE_SIDD_CMP_EQUAL_EACH 0x08
66+ # define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c
67+ # define SIMDE_SIDD_POSITIVE_POLARITY 0x00
68+ # define SIMDE_SIDD_NEGATIVE_POLARITY 0x10
69+ # define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20
70+ # define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30
71+ # define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00
72+ # define SIMDE_SIDD_MOST_SIGNIFICANT 0x40
73+ # define SIMDE_SIDD_BIT_MASK 0x00
74+ # define SIMDE_SIDD_UNIT_MASK 0x40
5075#endif
5176
5277SIMDE__FUNCTION_ATTRIBUTES
@@ -61,72 +86,79 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
6186 const int upper_bound = (128 / 8 ) - 1 ;
6287 int a_invalid = 0 ;
6388 int b_invalid = 0 ;
64- for (int i = 0 ; i < ( upper_bound ) ; i ++ ) {
65- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
89+ for (int i = 0 ; i < upper_bound ; i ++ ) {
90+ for (int j = 0 ; j < upper_bound ; j ++ ){
6691 int bitvalue = ((a_ .i8 [i ] == b_ .i8 [j ]) ? 1 : 0 );
67- bool_res_ .i8 [i ] |= (( bitvalue ) << j );
6892 if (i == la )
6993 a_invalid = 1 ;
7094 if (j == lb )
7195 b_invalid = 1 ;
7296 switch (cmp_op ){
7397 case SIMDE_SIDD_CMP_EQUAL_ANY :
98+ bitvalue = 0 ;
7499 break ;
75100 case SIMDE_SIDD_CMP_RANGES :
101+ bitvalue = 0 ;
76102 break ;
77103 case SIMDE_SIDD_CMP_EQUAL_EACH :
78104 if (a_invalid && b_invalid )
79- bool_res_ .i8 [i ] |= (1 << j );
105+ bitvalue = 1 ;
106+ else
107+ bitvalue = 0 ;
80108 break ;
81109 case SIMDE_SIDD_CMP_EQUAL_ORDERED :
82110 if (a_invalid && !b_invalid )
83- bool_res_ . i8 [ i ] |= ( 1 << j ) ;
111+ bitvalue = 1 ;
84112 else if (a_invalid && b_invalid )
85- bool_res_ .i8 [i ] |= (1 << j );
113+ bitvalue = 1 ;
114+ else
115+ bitvalue = 0 ;
86116 break ;
87117 }
118+ bool_res_ .i8 [i ] |= (bitvalue << j );
88119 }
89120 }
90121 int32_t int_res_1 = 0 ;
91122 int32_t int_res_2 = 0 ;
92123 switch (cmp_op ) {
93124 case SIMDE_SIDD_CMP_EQUAL_ANY :
94- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
125+ for (int i = 0 ; i < upper_bound ; i ++ ){
95126 SIMDE__VECTORIZE_REDUCTION (|:int_res_1 )
96- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
127+ for (int j = 0 ; j < upper_bound ; j ++ ){
97128 int_res_1 |= (((bool_res_ .i8 [i ] >> j ) & 1 ) << i );
98129 }
99130 }
100131 break ;
101132 case SIMDE_SIDD_CMP_RANGES :
102- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
133+ for (int i = 0 ; i < upper_bound ; i ++ ){
103134 SIMDE__VECTORIZE_REDUCTION (|:int_res_1 )
104- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
135+ for (int j = 0 ; j < upper_bound ; j ++ ){
105136 int_res_1 |= ((((bool_res_ .i8 [i ] >> j ) & 1 ) & ((bool_res_ .i8 [i ] >> (j + 1 )) & 1 )) << i );
106137 j += 2 ;
107138 }
108139 }
109140 break ;
110141 case SIMDE_SIDD_CMP_EQUAL_EACH :
111- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
142+ for (int i = 0 ; i < upper_bound ; i ++ ){
112143 SIMDE__VECTORIZE_REDUCTION (|:int_res_1 )
113- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
144+ for (int j = 0 ; j < upper_bound ; j ++ ){
114145 int_res_1 |= (((bool_res_ .i8 [i ] >> i ) & 1 ) << i );
115146 }
116147 }
117148 break ;
118149 case SIMDE_SIDD_CMP_EQUAL_ORDERED :
119- int_res_1 = ( imm8 & 1 ) ? 0xff : 0xffff ;
120- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
150+ int_res_1 = 0xff ;
151+ for (int i = 0 ; i < upper_bound ; i ++ ){
121152 int k = i ;
122- SIMDE__VECTORIZE_REDUCTION (| :int_res_1 )
153+ SIMDE__VECTORIZE_REDUCTION (& :int_res_1 )
123154 for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
124155 int_res_1 &= (((bool_res_ .i8 [k ] >> j ) & 1 ) << i ) ;
125156 k += 1 ;
126157 }
127158 }
159+ break ;
128160 }
129- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
161+ for (int i = 0 ; i < upper_bound ; i ++ ){
130162 if (polarity & 1 ){
131163 if ((polarity >> 1 ) & 1 ) {
132164 if (i >= lb ) {
@@ -144,7 +176,7 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
144176 int_res_2 |= ( ((int_res_1 >> i ) & 1 ) << i );
145177 }
146178 }
147- return ( ( int_res_2 == 0 ) & (lb > upper_bound ) );
179+ return ! int_res_2 & (lb > upper_bound );
148180}
149181
150182SIMDE__FUNCTION_ATTRIBUTES
@@ -159,73 +191,80 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
159191 const int upper_bound = (128 / 16 ) - 1 ;
160192 int a_invalid = 0 ;
161193 int b_invalid = 0 ;
162- for (int i = 0 ; i < ( upper_bound ) ; i ++ ) {
163- for (int j = 0 ; j < ( upper_bound ) ; j ++ )
194+ for (int i = 0 ; i < upper_bound ; i ++ ) {
195+ for (int j = 0 ; j < upper_bound ; j ++ )
164196 {
165197 int bitvalue = ((a_ .i16 [i ] == b_ .i16 [j ]) ? 1 : 0 );
166- bool_res_ .i16 [i ] |= ((bitvalue ) << j );
167198 if (i == la )
168199 a_invalid = 1 ;
169200 if (j == lb )
170201 b_invalid = 1 ;
171202 switch (cmp_op ){
172203 case SIMDE_SIDD_CMP_EQUAL_ANY :
204+ bitvalue = 0 ;
173205 break ;
174206 case SIMDE_SIDD_CMP_RANGES :
207+ bitvalue = 0 ;
175208 break ;
176209 case SIMDE_SIDD_CMP_EQUAL_EACH :
177210 if (a_invalid && b_invalid )
178- bool_res_ .i16 [i ] |= (1 << j );
211+ bitvalue = 1 ;
212+ else
213+ bitvalue = 0 ;
179214 break ;
180215 case SIMDE_SIDD_CMP_EQUAL_ORDERED :
181216 if (a_invalid && !b_invalid )
182- bool_res_ . i16 [ i ] |= ( 1 << j ) ;
217+ bitvalue = 1 ;
183218 else if (a_invalid && b_invalid )
184- bool_res_ .i16 [i ] |= (1 << j );
219+ bitvalue = 1 ;
220+ else
221+ bitvalue = 0 ;
185222 break ;
186223 }
224+ bool_res_ .i16 [i ] |= (bitvalue << j );
187225 }
188226 }
189227 int32_t int_res_1 = 0 ;
190228 int32_t int_res_2 = 0 ;
191229 switch (cmp_op ) {
192230 case SIMDE_SIDD_CMP_EQUAL_ANY :
193- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
231+ for (int i = 0 ; i < upper_bound ; i ++ ){
194232 SIMDE__VECTORIZE_REDUCTION (|:int_res_1 )
195- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
233+ for (int j = 0 ; j < upper_bound ; j ++ ){
196234 int_res_1 |= (((bool_res_ .i16 [i ] >> j ) & 1 ) << i ) ;
197235 }
198236 }
199237 break ;
200238 case SIMDE_SIDD_CMP_RANGES :
201- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
239+ for (int i = 0 ; i < upper_bound ; i ++ ){
202240 SIMDE__VECTORIZE_REDUCTION (|:int_res_1 )
203- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
241+ for (int j = 0 ; j < upper_bound ; j ++ ){
204242 int_res_1 |= ((((bool_res_ .i16 [i ] >> j ) & 1 ) & ((bool_res_ .i16 [i ] >> (j + 1 )) & 1 )) << i );
205243 j += 2 ;
206244 }
207245 }
208246 break ;
209247 case SIMDE_SIDD_CMP_EQUAL_EACH :
210- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
248+ for (int i = 0 ; i < upper_bound ; i ++ ){
211249 SIMDE__VECTORIZE_REDUCTION (|:int_res_1 )
212- for (int j = 0 ; j < ( upper_bound ) ; j ++ ){
250+ for (int j = 0 ; j < upper_bound ; j ++ ){
213251 int_res_1 |= (((bool_res_ .i16 [i ] >> i ) & 1 ) << i );
214252 }
215253 }
216254 break ;
217255 case SIMDE_SIDD_CMP_EQUAL_ORDERED :
218- int_res_1 = ( imm8 & 1 ) ? 0xff : 0xffff ;
219- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
256+ int_res_1 = 0xffff ;
257+ for (int i = 0 ; i < upper_bound ; i ++ ){
220258 int k = i ;
221- SIMDE__VECTORIZE_REDUCTION (| :int_res_1 )
259+ SIMDE__VECTORIZE_REDUCTION (& :int_res_1 )
222260 for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
223261 int_res_1 &= (((bool_res_ .i16 [k ] >> j ) & 1 ) << i ) ;
224262 k += 1 ;
225263 }
226264 }
265+ break ;
227266 }
228- for (int i = 0 ; i < ( upper_bound ) ; i ++ ){
267+ for (int i = 0 ; i < upper_bound ; i ++ ){
229268 if (polarity & 1 ){
230269 if ((polarity >> 1 ) & 1 ) {
231270 if (i >= lb ) {
@@ -243,22 +282,17 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
243282 int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
244283 }
245284 }
246- return (( int_res_2 == 0 ) & (lb > upper_bound ) );
285+ return ! int_res_2 & (lb > upper_bound );
247286}
248287
249- SIMDE__FUNCTION_ATTRIBUTES
250- int
251- simde_mm_cmpestra (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 ){
252- const int character_type = imm8 & 0x03 ;
253- if (character_type & 1 )
254- return simde_mm_cmpestra_8_ (a , la , b , lb , imm8 );
255- else
256- return simde_mm_cmpestra_16_ (a , la , b , lb , imm8 );
257- }
258288#if defined(SIMDE_X86_SSE4_2_NATIVE )
259289# define simde_mm_cmpestra (a , la , b , lb , imm8 ) _mm_cmpestra(a, la, b, lb, imm8)
260290#endif
261291#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES )
292+ # define simde_mm_cmpestra (a , la , b , lb , imm8 ) \
293+ (((imm8) & SIMDE_SIDD_UWORD_OPS) \
294+ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
295+ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
262296# define _mm_cmpestra (a , la , b , lb , imm8 ) simde_mm_cmpestra(a, la, b, lb, imm8)
263297#endif
264298
0 commit comments