Skip to content

Commit cfe4b7f

Browse files
sse4.2: made few changes
1 parent fd4cd19 commit cfe4b7f

File tree

1 file changed

+86
-52
lines changed

1 file changed

+86
-52
lines changed

simde/x86/sse4.2.h

Lines changed: 86 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,41 @@ SIMDE__BEGIN_DECLS
3737
# define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
3838
#endif
3939

40-
#define SIMDE_SIDD_CMP_EQUAL_ANY 0
41-
#define SIMDE_SIDD_CMP_RANGES 1
42-
#define SIMDE_SIDD_CMP_EQUAL_EACH 2
43-
#define SIMDE_SIDD_CMP_EQUAL_ORDERED 3
40+
#if defined(SIMDE_X86_SSE4_2_NATIVE)
41+
# define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS
42+
# define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS
43+
# define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS
44+
# define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS
45+
# define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY
46+
# define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES
47+
# define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH
48+
# define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED
49+
# define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY
50+
# define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY
51+
# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY
52+
# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY
53+
# define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT
54+
# define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT
55+
# define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK
56+
# define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK
4457

45-
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
46-
#define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY
47-
#define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES
48-
#define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH
49-
#define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED
58+
#else
59+
# define SIMDE_SIDD_UBYTE_OPS 0x00
60+
# define SIMDE_SIDD_UWORD_OPS 0x01
61+
# define SIMDE_SIDD_SBYTE_OPS 0x02
62+
# define SIMDE_SIDD_SWORD_OPS 0x03
63+
# define SIMDE_SIDD_CMP_EQUAL_ANY 0x00
64+
# define SIMDE_SIDD_CMP_RANGES 0x04
65+
# define SIMDE_SIDD_CMP_EQUAL_EACH 0x08
66+
# define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c
67+
# define SIMDE_SIDD_POSITIVE_POLARITY 0x00
68+
# define SIMDE_SIDD_NEGATIVE_POLARITY 0x10
69+
# define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20
70+
# define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30
71+
# define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00
72+
# define SIMDE_SIDD_MOST_SIGNIFICANT 0x40
73+
# define SIMDE_SIDD_BIT_MASK 0x00
74+
# define SIMDE_SIDD_UNIT_MASK 0x40
5075
#endif
5176

5277
SIMDE__FUNCTION_ATTRIBUTES
@@ -61,72 +86,79 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
6186
const int upper_bound = (128 / 8) - 1;
6287
int a_invalid = 0;
6388
int b_invalid = 0;
64-
for(int i = 0 ; i < (upper_bound) ; i++) {
65-
for(int j = 0; j< (upper_bound) ; j++){
89+
for(int i = 0 ; i < upper_bound ; i++) {
90+
for(int j = 0; j< upper_bound ; j++){
6691
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
67-
bool_res_.i8[i] |= (( bitvalue ) << j);
6892
if(i == la)
6993
a_invalid = 1;
7094
if(j == lb)
7195
b_invalid = 1;
7296
switch(cmp_op){
7397
case SIMDE_SIDD_CMP_EQUAL_ANY:
98+
bitvalue = 0;
7499
break;
75100
case SIMDE_SIDD_CMP_RANGES:
101+
bitvalue = 0;
76102
break;
77103
case SIMDE_SIDD_CMP_EQUAL_EACH:
78104
if(a_invalid && b_invalid)
79-
bool_res_.i8[i] |= (1 << j);
105+
bitvalue = 1;
106+
else
107+
bitvalue = 0;
80108
break;
81109
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
82110
if(a_invalid && !b_invalid)
83-
bool_res_.i8[i] |= (1 << j);
111+
bitvalue = 1;
84112
else if(a_invalid && b_invalid)
85-
bool_res_.i8[i] |= (1 << j);
113+
bitvalue = 1;
114+
else
115+
bitvalue = 0;
86116
break;
87117
}
118+
bool_res_.i8[i] |= (bitvalue << j);
88119
}
89120
}
90121
int32_t int_res_1 = 0;
91122
int32_t int_res_2 = 0;
92123
switch(cmp_op) {
93124
case SIMDE_SIDD_CMP_EQUAL_ANY:
94-
for(int i = 0 ; i < (upper_bound) ; i++){
125+
for(int i = 0 ; i < upper_bound ; i++){
95126
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
96-
for(int j = 0 ; j < (upper_bound) ; j++){
127+
for(int j = 0 ; j < upper_bound ; j++){
97128
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
98129
}
99130
}
100131
break;
101132
case SIMDE_SIDD_CMP_RANGES:
102-
for(int i = 0 ; i < (upper_bound) ; i++){
133+
for(int i = 0 ; i < upper_bound ; i++){
103134
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
104-
for(int j = 0 ; j < (upper_bound) ; j++){
135+
for(int j = 0 ; j < upper_bound ; j++){
105136
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
106137
j += 2;
107138
}
108139
}
109140
break;
110141
case SIMDE_SIDD_CMP_EQUAL_EACH:
111-
for(int i = 0 ; i < (upper_bound) ; i++){
142+
for(int i = 0 ; i < upper_bound ; i++){
112143
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
113-
for(int j = 0 ; j < (upper_bound) ; j++){
144+
for(int j = 0 ; j < upper_bound ; j++){
114145
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
115146
}
116147
}
117148
break;
118149
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
119-
int_res_1 = (imm8 & 1) ? 0xff : 0xffff;
120-
for(int i = 0 ; i < (upper_bound) ; i++){
150+
int_res_1 = 0xff;
151+
for(int i = 0 ; i < upper_bound ; i++){
121152
int k = i;
122-
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
153+
SIMDE__VECTORIZE_REDUCTION(&:int_res_1)
123154
for(int j = 0 ; j < (upper_bound-i) ; j++){
124155
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
125156
k += 1;
126157
}
127158
}
159+
break;
128160
}
129-
for(int i = 0; i < (upper_bound) ; i++){
161+
for(int i = 0; i < upper_bound ; i++){
130162
if(polarity & 1){
131163
if((polarity >> 1) & 1) {
132164
if (i >= lb) {
@@ -144,7 +176,7 @@ simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int i
144176
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
145177
}
146178
}
147-
return ( (int_res_2 == 0) & (lb > upper_bound) );
179+
return !int_res_2 & (lb > upper_bound);
148180
}
149181

150182
SIMDE__FUNCTION_ATTRIBUTES
@@ -159,73 +191,80 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
159191
const int upper_bound = (128 / 16) - 1;
160192
int a_invalid = 0;
161193
int b_invalid = 0;
162-
for(int i = 0 ; i < (upper_bound) ; i++) {
163-
for(int j = 0; j< (upper_bound) ; j++)
194+
for(int i = 0 ; i < upper_bound ; i++) {
195+
for(int j = 0; j< upper_bound ; j++)
164196
{
165197
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
166-
bool_res_.i16[i] |= ((bitvalue) << j);
167198
if(i == la)
168199
a_invalid = 1;
169200
if(j == lb)
170201
b_invalid = 1;
171202
switch(cmp_op){
172203
case SIMDE_SIDD_CMP_EQUAL_ANY:
204+
bitvalue = 0;
173205
break;
174206
case SIMDE_SIDD_CMP_RANGES:
207+
bitvalue = 0;
175208
break;
176209
case SIMDE_SIDD_CMP_EQUAL_EACH:
177210
if(a_invalid && b_invalid)
178-
bool_res_.i16[i] |= (1 << j);
211+
bitvalue = 1;
212+
else
213+
bitvalue = 0;
179214
break;
180215
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
181216
if(a_invalid && !b_invalid)
182-
bool_res_.i16[i] |= (1 << j);
217+
bitvalue = 1;
183218
else if(a_invalid && b_invalid)
184-
bool_res_.i16[i] |= (1 << j);
219+
bitvalue = 1;
220+
else
221+
bitvalue = 0;
185222
break;
186223
}
224+
bool_res_.i16[i] |= (bitvalue << j);
187225
}
188226
}
189227
int32_t int_res_1 = 0;
190228
int32_t int_res_2 = 0;
191229
switch(cmp_op) {
192230
case SIMDE_SIDD_CMP_EQUAL_ANY:
193-
for(int i = 0 ; i < (upper_bound) ; i++){
231+
for(int i = 0 ; i < upper_bound ; i++){
194232
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
195-
for (int j = 0 ; j < (upper_bound) ; j++){
233+
for (int j = 0 ; j < upper_bound ; j++){
196234
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
197235
}
198236
}
199237
break;
200238
case SIMDE_SIDD_CMP_RANGES:
201-
for(int i = 0 ; i < (upper_bound) ; i++){
239+
for(int i = 0 ; i < upper_bound ; i++){
202240
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
203-
for(int j = 0 ; j < (upper_bound) ; j++){
241+
for(int j = 0 ; j < upper_bound ; j++){
204242
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
205243
j += 2;
206244
}
207245
}
208246
break;
209247
case SIMDE_SIDD_CMP_EQUAL_EACH:
210-
for(int i = 0 ; i < (upper_bound) ; i++){
248+
for(int i = 0 ; i < upper_bound ; i++){
211249
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
212-
for(int j = 0 ; j < (upper_bound) ; j++){
250+
for(int j = 0 ; j < upper_bound ; j++){
213251
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
214252
}
215253
}
216254
break;
217255
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
218-
int_res_1 = (imm8 & 1) ? 0xff : 0xffff;
219-
for(int i = 0 ; i < (upper_bound) ; i++){
256+
int_res_1 = 0xffff;
257+
for(int i = 0 ; i < upper_bound ; i++){
220258
int k = i;
221-
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
259+
SIMDE__VECTORIZE_REDUCTION(&:int_res_1)
222260
for(int j = 0 ; j < (upper_bound-i) ; j++){
223261
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
224262
k += 1;
225263
}
226264
}
265+
break;
227266
}
228-
for(int i = 0; i < (upper_bound) ; i++){
267+
for(int i = 0; i < upper_bound ; i++){
229268
if(polarity & 1){
230269
if((polarity >> 1) & 1) {
231270
if (i >= lb) {
@@ -243,22 +282,17 @@ simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int
243282
int_res_2 |= (((int_res_1 >> i) & 1) << i);
244283
}
245284
}
246-
return ((int_res_2 == 0) & (lb > upper_bound));
285+
return !int_res_2 & (lb > upper_bound);
247286
}
248287

249-
SIMDE__FUNCTION_ATTRIBUTES
250-
int
251-
simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){
252-
const int character_type = imm8 & 0x03;
253-
if(character_type & 1)
254-
return simde_mm_cmpestra_8_(a, la, b, lb, imm8);
255-
else
256-
return simde_mm_cmpestra_16_(a, la, b, lb, imm8);
257-
}
258288
#if defined(SIMDE_X86_SSE4_2_NATIVE)
259289
# define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8)
260290
#endif
261291
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
292+
# define simde_mm_cmpestra(a, la, b, lb, imm8) \
293+
(((imm8) & SIMDE_SIDD_UWORD_OPS) \
294+
? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
295+
: simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
262296
# define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
263297
#endif
264298

0 commit comments

Comments
 (0)