@@ -105,8 +105,8 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
105105 const int upper_bound = (128 / 8 ) - 1 ;
106106 int a_invalid = 0 ;
107107 int b_invalid = 0 ;
108- for (int i = 0 ; i < upper_bound ; i ++ ) {
109- for (int j = 0 ; j < upper_bound ; j ++ ){
108+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
109+ for (int j = 0 ; j <= upper_bound ; j ++ ){
110110 int bitvalue = ((a_ .i8 [i ] == b_ .i8 [j ]) ? 1 : 0 );
111111 if (i == la )
112112 a_invalid = 1 ;
@@ -132,70 +132,93 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
132132 bitvalue = 0 ;
133133 break ;
134134 }
135- bool_res_ .i8 [i ] |= (bitvalue << j );
135+ if (bitvalue )
136+ bool_res_ .i16 [i ] |= (1UL << j );
137+ else
138+ bool_res_ .i16 [i ] &= ~(1UL << j );
136139 }
137140 }
138141 int32_t int_res_1 = 0 ;
139142 int32_t int_res_2 = 0 ;
140143 switch (cmp_op ) {
141144 case SIMDE_SIDD_CMP_EQUAL_ANY :
142- for (int i = 0 ; i < upper_bound ; i ++ ){
145+ for (int i = 0 ; i <= upper_bound ; i ++ ){
143146 SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
144- for (int j = 0 ; j < upper_bound ; j ++ ){
147+ for (int j = 0 ; j <= upper_bound ; j ++ ){
145148 int_res_1 |= (((bool_res_ .i8 [i ] >> j ) & 1 ) << i );
146149 }
147150 }
148151 break ;
149152 case SIMDE_SIDD_CMP_RANGES :
150- for (int i = 0 ; i < upper_bound ; i ++ ){
153+ for (int i = 0 ; i <= upper_bound ; i ++ ){
151154 SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
152- for (int j = 0 ; j < upper_bound ; j ++ ){
155+ for (int j = 0 ; j <= upper_bound ; j ++ ){
153156 int_res_1 |= ((((bool_res_ .i8 [i ] >> j ) & 1 ) & ((bool_res_ .i8 [i ] >> (j + 1 )) & 1 )) << i );
154157 j += 2 ;
155158 }
156159 }
157160 break ;
158161 case SIMDE_SIDD_CMP_EQUAL_EACH :
159- for (int i = 0 ; i < upper_bound ; i ++ ){
160- SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
161- for (int j = 0 ; j < upper_bound ; j ++ ){
162- int_res_1 |= (((bool_res_ .i8 [i ] >> i ) & 1 ) << i );
162+ for (int i = 0 ; i <= upper_bound ; i ++ ){
163+ //SIMDE_VECTORIZE_REDUCTION(|:int_res_1)
164+ for (int j = 0 ; j <= upper_bound ; j ++ ){
165+ int bitvalue = ((bool_res_ .i8 [i ] >> i ) & 1 );
166+ if (bitvalue )
167+ int_res_1 |= (1UL << i );
168+ else
169+ int_res_1 &= ~(1UL << i );
163170 }
164171 }
165172 break ;
166173 case SIMDE_SIDD_CMP_EQUAL_ORDERED :
167174 int_res_1 = 0xff ;
168- for (int i = 0 ; i < upper_bound ; i ++ ){
175+ for (int i = 0 ; i <= upper_bound ; i ++ ){
169176 int k = i ;
170177 HEDLEY_DIAGNOSTIC_PUSH
171178 #if defined(SIMDE_BUG_CLANG_45959 )
172179 #pragma clang diagnostic ignored "-Wsign-conversion"
173180 #endif
174181 SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
175- for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
182+ for (int j = 0 ; j <= (upper_bound - i ) ; j ++ ){
176183 int_res_1 &= (((bool_res_ .i8 [k ] >> j ) & 1 ) << i ) ;
177184 k += 1 ;
178185 }
179186 HEDLEY_DIAGNOSTIC_POP
180187 }
181188 break ;
182189 }
183- for (int i = 0 ; i < upper_bound ; i ++ ){
190+ for (int i = 0 ; i <= upper_bound ; i ++ ){
184191 if (polarity & SIMDE_SIDD_NEGATIVE_POLARITY ){
185192 if (polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY ) {
186193 if (i >= lb ) {
187- int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
194+ int bitvalue = ((int_res_1 >> i ) & 1 );
195+ if (bitvalue )
196+ int_res_2 |= (1UL << i );
197+ else
198+ int_res_2 &= ~(1UL << i );
188199 }
189200 else {
190- int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
201+ int bitvalue = (((int_res_1 >> i ) & 1 ) ^ (-1 ));
202+ if (bitvalue )
203+ int_res_2 |= (1UL << i );
204+ else
205+ int_res_2 &= ~(1UL << i );
191206 }
192207 }
193208 else {
194- int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
209+ int bitvalue = (((int_res_1 >> i ) & 1 ) ^ (-1 ));
210+ if (bitvalue )
211+ int_res_2 |= (1UL << i );
212+ else
213+ int_res_2 &= ~(1UL << i );
195214 }
196215 }
197216 else {
198- int_res_2 |= ( ((int_res_1 >> i ) & 1 ) << i );
217+ int bitvalue = ((int_res_1 >> i ) & 1 );
218+ if (bitvalue )
219+ int_res_2 |= (1UL << i );
220+ else
221+ int_res_2 &= ~(1UL << i );
199222 }
200223 }
201224 return !int_res_2 & (lb > upper_bound );
@@ -214,8 +237,8 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
214237 const int upper_bound = (128 / 16 ) - 1 ;
215238 int a_invalid = 0 ;
216239 int b_invalid = 0 ;
217- for (int i = 0 ; i < upper_bound ; i ++ ) {
218- for (int j = 0 ; j < upper_bound ; j ++ )
240+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
241+ for (int j = 0 ; j <= upper_bound ; j ++ )
219242 {
220243 int bitvalue = ((a_ .i16 [i ] == b_ .i16 [j ]) ? 1 : 0 );
221244 if (i == la )
@@ -242,70 +265,93 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
242265 bitvalue = 0 ;
243266 break ;
244267 }
245- bool_res_ .i16 [i ] |= (bitvalue << j );
268+ if (bitvalue )
269+ bool_res_ .i16 [i ] |= (1UL << j );
270+ else
271+ bool_res_ .i16 [i ] &= ~(1UL << j );
246272 }
247273 }
248274 int32_t int_res_1 = 0 ;
249275 int32_t int_res_2 = 0 ;
250276 switch (cmp_op ) {
251277 case SIMDE_SIDD_CMP_EQUAL_ANY :
252- for (int i = 0 ; i < upper_bound ; i ++ ){
278+ for (int i = 0 ; i <= upper_bound ; i ++ ){
253279 SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
254- for (int j = 0 ; j < upper_bound ; j ++ ){
280+ for (int j = 0 ; j <= upper_bound ; j ++ ){
255281 int_res_1 |= (((bool_res_ .i16 [i ] >> j ) & 1 ) << i ) ;
256282 }
257283 }
258284 break ;
259285 case SIMDE_SIDD_CMP_RANGES :
260- for (int i = 0 ; i < upper_bound ; i ++ ){
286+ for (int i = 0 ; i <= upper_bound ; i ++ ){
261287 SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
262- for (int j = 0 ; j < upper_bound ; j ++ ){
288+ for (int j = 0 ; j <= upper_bound ; j ++ ){
263289 int_res_1 |= ((((bool_res_ .i16 [i ] >> j ) & 1 ) & ((bool_res_ .i16 [i ] >> (j + 1 )) & 1 )) << i );
264290 j += 2 ;
265291 }
266292 }
267293 break ;
268294 case SIMDE_SIDD_CMP_EQUAL_EACH :
269- for (int i = 0 ; i < upper_bound ; i ++ ){
295+ for (int i = 0 ; i <= upper_bound ; i ++ ){
270296 SIMDE_VECTORIZE_REDUCTION (|:int_res_1 )
271- for (int j = 0 ; j < upper_bound ; j ++ ){
272- int_res_1 |= (((bool_res_ .i16 [i ] >> i ) & 1 ) << i );
297+ for (int j = 0 ; j <= upper_bound ; j ++ ){
298+ int bitvalue = ((bool_res_ .i16 [i ] >> i ) & 1 );
299+ if (bitvalue )
300+ int_res_1 |= (1UL << i );
301+ else
302+ int_res_1 &= ~(1UL << i );
273303 }
274304 }
275305 break ;
276306 case SIMDE_SIDD_CMP_EQUAL_ORDERED :
277307 int_res_1 = 0xffff ;
278- for (int i = 0 ; i < upper_bound ; i ++ ){
308+ for (int i = 0 ; i <= upper_bound ; i ++ ){
279309 int k = i ;
280310 HEDLEY_DIAGNOSTIC_PUSH
281311 #if defined(SIMDE_BUG_CLANG_45959 )
282312 #pragma clang diagnostic ignored "-Wsign-conversion"
283313 #endif
284314 SIMDE_VECTORIZE_REDUCTION (& :int_res_1 )
285- for (int j = 0 ; j < (upper_bound - i ) ; j ++ ){
315+ for (int j = 0 ; j <= (upper_bound - i ) ; j ++ ){
286316 int_res_1 &= (((bool_res_ .i16 [k ] >> j ) & 1 ) << i ) ;
287317 k += 1 ;
288318 }
289319 HEDLEY_DIAGNOSTIC_POP
290320 }
291321 break ;
292322 }
293- for (int i = 0 ; i < upper_bound ; i ++ ){
323+ for (int i = 0 ; i <= upper_bound ; i ++ ){
294324 if (polarity & SIMDE_SIDD_NEGATIVE_POLARITY ){
295325 if (polarity & SIMDE_SIDD_MASKED_POSITIVE_POLARITY ) {
296326 if (i >= lb ) {
297- int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
327+ int bitvalue = ((int_res_1 >> i ) & 1 );
328+ if (bitvalue )
329+ int_res_2 |= (1UL << i );
330+ else
331+ int_res_2 &= ~(1UL << i );
298332 }
299333 else {
300- int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
334+ int bitvalue = (((int_res_1 >> i ) & 1 ) ^ (-1 ));
335+ if (bitvalue )
336+ int_res_2 |= (1UL << i );
337+ else
338+ int_res_2 &= ~(1UL << i );
301339 }
302340 }
303341 else {
304- int_res_2 |= ((((int_res_1 >> i ) & 1 ) ^ (-1 )) << i );
342+ int bitvalue = (((int_res_1 >> i ) & 1 ) ^ (-1 ));
343+ if (bitvalue )
344+ int_res_2 |= (1UL << i );
345+ else
346+ int_res_2 &= ~(1UL << i );
305347 }
306348 }
307349 else {
308- int_res_2 |= (((int_res_1 >> i ) & 1 ) << i );
350+ int bitvalue = ((int_res_1 >> i ) & 1 );
351+ if (bitvalue )
352+ int_res_2 |= (1UL << i );
353+ else
354+ int_res_2 &= ~(1UL << i );
309355 }
310356 }
311357 return !int_res_2 & (lb > upper_bound );
@@ -399,7 +445,7 @@ simde_mm_cmpistrs_8_(simde__m128i a) {
399445 const int upper_bound = (128 / 8 ) - 1 ;
400446 int a_invalid = 0 ;
401447 SIMDE_VECTORIZE
402- for (int i = 0 ; i < upper_bound ; i ++ ) {
448+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
403449 if (!a_ .i8 [i ])
404450 a_invalid = 1 ;
405451 }
@@ -413,7 +459,7 @@ simde_mm_cmpistrs_16_(simde__m128i a) {
413459 const int upper_bound = (128 / 16 ) - 1 ;
414460 int a_invalid = 0 ;
415461 SIMDE_VECTORIZE
416- for (int i = 0 ; i < upper_bound ; i ++ ) {
462+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
417463 if (!a_ .i16 [i ])
418464 a_invalid = 1 ;
419465 }
@@ -439,7 +485,7 @@ simde_mm_cmpistrz_8_(simde__m128i b) {
439485 const int upper_bound = (128 / 8 ) - 1 ;
440486 int b_invalid = 0 ;
441487 SIMDE_VECTORIZE
442- for (int i = 0 ; i < upper_bound ; i ++ ) {
488+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
443489 if (!b_ .i8 [i ])
444490 b_invalid = 1 ;
445491 }
@@ -453,7 +499,7 @@ simde_mm_cmpistrz_16_(simde__m128i b) {
453499 const int upper_bound = (128 / 16 ) - 1 ;
454500 int b_invalid = 0 ;
455501 SIMDE_VECTORIZE
456- for (int i = 0 ; i < upper_bound ; i ++ ) {
502+ for (int i = 0 ; i <= upper_bound ; i ++ ) {
457503 if (!b_ .i16 [i ])
458504 b_invalid = 1 ;
459505 }
0 commit comments