@@ -92,6 +92,237 @@ SIMDE_BEGIN_DECLS_
9292 #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK
9393#endif
9494
95+ SIMDE_FUNCTION_ATTRIBUTES
96+ int
97+ simde_mm_cmpestra_8_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
98+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
99+ const int cmp_op = imm8 & 0x0c ;
100+ const int polarity = imm8 & 0x30 ;
101+ simde__m128i_private
102+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
103+ a_ = simde__m128i_to_private (a ),
104+ b_ = simde__m128i_to_private (b );
105+ const int upper_bound = (128 / 8 ) - 1 ;
106+ int a_invalid = 0 ;
107+ int b_invalid = 0 ;
108+ for (int i = 0 ; i < upper_bound ; i ++ ) {
109+ for (int j = 0 ; j < upper_bound ; j ++ ){
110+ int bitvalue = ((a_ .i8 [i ] == b_ .i8 [j ]) ? 1 : 0 );
111+ if (i == la )
112+ a_invalid = 1 ;
113+ if (j == lb )
114+ b_invalid = 1 ;
115+ switch (cmp_op ){
116+ case SIMDE_SIDD_CMP_EQUAL_ANY :
117+ case SIMDE_SIDD_CMP_RANGES :
118+ bitvalue = 0 ;
119+ break ;
120+ case SIMDE_SIDD_CMP_EQUAL_EACH :
121+ if (a_invalid && b_invalid )
122+ bitvalue = 1 ;
123+ else
124+ bitvalue = 0 ;
125+ break ;
126+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
127+ if (a_invalid && !b_invalid )
128+ bitvalue = 1 ;
129+ else if (a_invalid && b_invalid )
130+ bitvalue = 1 ;
131+ else
132+ bitvalue = 0 ;
133+ break ;
134+ }
135+ bool_res_ .i8 [i ] |= (bitvalue << j );
136+ }
137+ }
138+ int32_t int_res_1 = 0 ;
139+ int32_t int_res_2 = 0 ;
140+ switch (cmp_op ) {
141+ case SIMDE_SIDD_CMP_EQUAL_ANY :
142+ for (int i = 0 ; i < upper_bound ; i ++ ){
143+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
144+ for (int j = 0 ; j < upper_bound ; j ++ ){
145+ int_res_1 |= (((bool_res_ .i8 [i ] >> j ) & 1 ) << i );
146+ }
147+ }
148+ break ;
149+ case SIMDE_SIDD_CMP_RANGES :
150+ for (int i = 0 ; i < upper_bound ; i ++ ){
151+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
152+ for (int j = 0 ; j < upper_bound ; j ++ ){
153+ int_res_1 |= ((((bool_res_ .i8 [i ] >> j ) & 1 ) & ((bool_res_ .i8 [i ] >> (j + 1 )) & 1 )) << i );
154+ j += 2 ;
155+ }
156+ }
157+ break ;
158+ case SIMDE_SIDD_CMP_EQUAL_EACH :
159+ for (int i = 0 ; i < upper_bound ; i ++ ){
160+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
161+ for (int j = 0 ; j < upper_bound ; j ++ ){
162+ int_res_1 |= (((bool_res_ .i8 [i ] >> i ) & 1 ) << i );
163+ }
164+ }
165+ break ;
166+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
167+ int_res_1 = 0xff ;
168+ for (int i = 0 ; i < upper_bound ; i ++ ){
169+ int k = i ;
170+ HEDLEY_DIAGNOSTIC_PUSH
171+ #if defined(SIMDE_BUG_CLANG_45959 )
172+ #pragma clang diagnostic ignored "-Wsign-conversion"
173+ #endif
174+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
175+ for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
176+ int_res_1 &= (((bool_res_ .i8 [k ] >> j ) & 1 ) << i ) ;
177+ k += 1 ;
178+ }
179+ HEDLEY_DIAGNOSTIC_POP
180+ }
181+ break ;
182+ }
183+ for (int i = 0 ; i < upper_bound ; i ++ ){
184+ if (polarity & SIMDE_SIDD_NEGATIVE_POLARITY ){
185+ if (polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY ) {
186+ if (i >= lb ) {
187+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
188+ }
189+ else {
190+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
191+ }
192+ }
193+ else {
194+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
195+ }
196+ }
197+ else {
198+ int_res_2 |= ( ((int_res_1 >> i ) & 1 ) << i );
199+ }
200+ }
201+ return !int_res_2 & (lb > upper_bound );
202+ }
203+
204+ SIMDE_FUNCTION_ATTRIBUTES
205+ int
206+ simde_mm_cmpestra_16_ (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
207+ SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 255 ) {
208+ const int cmp_op = imm8 & 0x0c ;
209+ const int polarity = imm8 & 0x30 ;
210+ simde__m128i_private
211+ bool_res_ = simde__m128i_to_private (simde_mm_setzero_si128 ()),
212+ a_ = simde__m128i_to_private (a ),
213+ b_ = simde__m128i_to_private (b );
214+ const int upper_bound = (128 / 16 ) - 1 ;
215+ int a_invalid = 0 ;
216+ int b_invalid = 0 ;
217+ for (int i = 0 ; i < upper_bound ; i ++ ) {
218+ for (int j = 0 ; j < upper_bound ; j ++ )
219+ {
220+ int bitvalue = ((a_ .i16 [i ] == b_ .i16 [j ]) ? 1 : 0 );
221+ if (i == la )
222+ a_invalid = 1 ;
223+ if (j == lb )
224+ b_invalid = 1 ;
225+ switch (cmp_op ){
226+ case SIMDE_SIDD_CMP_EQUAL_ANY :
227+ case SIMDE_SIDD_CMP_RANGES :
228+ bitvalue = 0 ;
229+ break ;
230+ case SIMDE_SIDD_CMP_EQUAL_EACH :
231+ if (a_invalid && b_invalid )
232+ bitvalue = 1 ;
233+ else
234+ bitvalue = 0 ;
235+ break ;
236+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
237+ if (a_invalid && !b_invalid )
238+ bitvalue = 1 ;
239+ else if (a_invalid && b_invalid )
240+ bitvalue = 1 ;
241+ else
242+ bitvalue = 0 ;
243+ break ;
244+ }
245+ bool_res_ .i16 [i ] |= (bitvalue << j );
246+ }
247+ }
248+ int32_t int_res_1 = 0 ;
249+ int32_t int_res_2 = 0 ;
250+ switch (cmp_op ) {
251+ case SIMDE_SIDD_CMP_EQUAL_ANY :
252+ for (int i = 0 ; i < upper_bound ; i ++ ){
253+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
254+ for (int j = 0 ; j < upper_bound ; j ++ ){
255+ int_res_1 |= (((bool_res_ .i16 [i ] >> j ) & 1 ) << i ) ;
256+ }
257+ }
258+ break ;
259+ case SIMDE_SIDD_CMP_RANGES :
260+ for (int i = 0 ; i < upper_bound ; i ++ ){
261+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
262+ for (int j = 0 ; j < upper_bound ; j ++ ){
263+ int_res_1 |= ((((bool_res_ .i16 [i ] >> j ) & 1 ) & ((bool_res_ .i16 [i ] >> (j + 1 )) & 1 )) << i );
264+ j += 2 ;
265+ }
266+ }
267+ break ;
268+ case SIMDE_SIDD_CMP_EQUAL_EACH :
269+ for (int i = 0 ; i < upper_bound ; i ++ ){
270+ SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
271+ for (int j = 0 ; j < upper_bound ; j ++ ){
272+ int_res_1 |= (((bool_res_ .i16 [i ] >> i ) & 1 ) << i );
273+ }
274+ }
275+ break ;
276+ case SIMDE_SIDD_CMP_EQUAL_ORDERED :
277+ int_res_1 = 0xffff ;
278+ for (int i = 0 ; i < upper_bound ; i ++ ){
279+ int k = i ;
280+ HEDLEY_DIAGNOSTIC_PUSH
281+ #if defined(SIMDE_BUG_CLANG_45959 )
282+ #pragma clang diagnostic ignored "-Wsign-conversion"
283+ #endif
284+ SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
285+ for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
286+ int_res_1 &= (((bool_res_ .i16 [k ] >> j ) & 1 ) << i ) ;
287+ k += 1 ;
288+ }
289+ HEDLEY_DIAGNOSTIC_POP
290+ }
291+ break ;
292+ }
293+ for (int i = 0 ; i < upper_bound ; i ++ ){
294+ if (polarity & SIMDE_SIDD_NEGATIVE_POLARITY ){
295+ if (polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY ) {
296+ if (i >= lb ) {
297+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
298+ }
299+ else {
300+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
301+ }
302+ }
303+ else {
304+ int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
305+ }
306+ }
307+ else {
308+ int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
309+ }
310+ }
311+ return !int_res_2 & (lb > upper_bound );
312+ }
313+
314+ #if defined(SIMDE_X86_SSE4_2_NATIVE )
315+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) _mm_cmpestra(a, la, b, lb, imm8)
316+ #else
317+ #define simde_mm_cmpestra (a , la , b , lb , imm8 ) \
318+ (((imm8) & SIMDE_SIDD_UWORD_OPS) \
319+ ? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
320+ : simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
321+ #endif
322+ #if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES )
323+ #define _mm_cmpestra (a , la , b , lb , imm8 ) simde_mm_cmpestra(a, la, b, lb, imm8)
324+ #endif
325+
95326SIMDE_FUNCTION_ATTRIBUTES
96327int simde_mm_cmpestrs (simde__m128i a , int la , simde__m128i b , int lb , const int imm8 )
97328 SIMDE_REQUIRE_CONSTANT_RANGE (imm8 , 0 , 127 ) {
0 commit comments