Skip to content

Commit 3506e4e

Browse files
committed
x86 avx512 cmpneq: finish the implementations; synch with cmpeq
1 parent 3a2a97d commit 3506e4e

File tree

7 files changed

+4246
-1022
lines changed

7 files changed

+4246
-1022
lines changed

simde/x86/avx2.h

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,27 @@ simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) {
11581158
#define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b)
11591159
#endif
11601160

1161+
SIMDE_FUNCTION_ATTRIBUTES
1162+
simde__m256i
1163+
simde_x_mm256_cmpneq_epi8 (simde__m256i a, simde__m256i b) {
1164+
simde__m256i_private
1165+
r_,
1166+
a_ = simde__m256i_to_private(a),
1167+
b_ = simde__m256i_to_private(b);
1168+
1169+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1170+
r_.m128i[0] = simde_x_mm_cmpneq_epi8(a_.m128i[0], b_.m128i[0]);
1171+
r_.m128i[1] = simde_x_mm_cmpneq_epi8(a_.m128i[1], b_.m128i[1]);
1172+
#else
1173+
SIMDE_VECTORIZE
1174+
for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
1175+
r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
1176+
}
1177+
#endif
1178+
1179+
return simde__m256i_from_private(r_);
1180+
}
1181+
11611182
SIMDE_FUNCTION_ATTRIBUTES
11621183
simde__m256i
11631184
simde_x_mm256_cmpeq_epu8 (simde__m256i a, simde__m256i b) {
@@ -1179,6 +1200,26 @@ simde_x_mm256_cmpeq_epu8 (simde__m256i a, simde__m256i b) {
11791200
return simde__m256i_from_private(r_);
11801201
}
11811202

1203+
SIMDE_FUNCTION_ATTRIBUTES
1204+
simde__m256i
1205+
simde_x_mm256_cmpneq_epu8 (simde__m256i a, simde__m256i b) {
1206+
simde__m256i_private
1207+
r_,
1208+
a_ = simde__m256i_to_private(a),
1209+
b_ = simde__m256i_to_private(b);
1210+
1211+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1212+
r_.m128i[0] = simde_x_mm_cmpneq_epu8(a_.m128i[0], b_.m128i[0]);
1213+
r_.m128i[1] = simde_x_mm_cmpneq_epu8(a_.m128i[1], b_.m128i[1]);
1214+
#else
1215+
SIMDE_VECTORIZE
1216+
for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
1217+
r_.u8[i] = (a_.u8[i] != b_.u8[i]) ? UINT8_MAX : UINT8_C(0);
1218+
}
1219+
#endif
1220+
1221+
return simde__m256i_from_private(r_);
1222+
}
11821223
SIMDE_FUNCTION_ATTRIBUTES
11831224
simde__m256i
11841225
simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) {
@@ -1210,6 +1251,27 @@ simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) {
12101251
#define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b)
12111252
#endif
12121253

1254+
SIMDE_FUNCTION_ATTRIBUTES
1255+
simde__m256i
1256+
simde_x_mm256_cmpneq_epi16 (simde__m256i a, simde__m256i b) {
1257+
simde__m256i_private
1258+
r_,
1259+
a_ = simde__m256i_to_private(a),
1260+
b_ = simde__m256i_to_private(b);
1261+
1262+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1263+
r_.m128i[0] = simde_x_mm_cmpneq_epi16(a_.m128i[0], b_.m128i[0]);
1264+
r_.m128i[1] = simde_x_mm_cmpneq_epi16(a_.m128i[1], b_.m128i[1]);
1265+
#else
1266+
SIMDE_VECTORIZE
1267+
for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
1268+
r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
1269+
}
1270+
#endif
1271+
1272+
return simde__m256i_from_private(r_);
1273+
}
1274+
12131275
SIMDE_FUNCTION_ATTRIBUTES
12141276
simde__m256i
12151277
simde_x_mm256_cmpeq_epu16 (simde__m256i a, simde__m256i b) {
@@ -1231,6 +1293,27 @@ simde_x_mm256_cmpeq_epu16 (simde__m256i a, simde__m256i b) {
12311293
return simde__m256i_from_private(r_);
12321294
}
12331295

1296+
SIMDE_FUNCTION_ATTRIBUTES
1297+
simde__m256i
1298+
simde_x_mm256_cmpneq_epu16 (simde__m256i a, simde__m256i b) {
1299+
simde__m256i_private
1300+
r_,
1301+
a_ = simde__m256i_to_private(a),
1302+
b_ = simde__m256i_to_private(b);
1303+
1304+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1305+
r_.m128i[0] = simde_x_mm_cmpneq_epu16(a_.m128i[0], b_.m128i[0]);
1306+
r_.m128i[1] = simde_x_mm_cmpneq_epu16(a_.m128i[1], b_.m128i[1]);
1307+
#else
1308+
SIMDE_VECTORIZE
1309+
for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
1310+
r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? UINT16_MAX : UINT16_C(0);
1311+
}
1312+
#endif
1313+
1314+
return simde__m256i_from_private(r_);
1315+
}
1316+
12341317
SIMDE_FUNCTION_ATTRIBUTES
12351318
simde__m256i
12361319
simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) {
@@ -1262,6 +1345,27 @@ simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) {
12621345
#define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b)
12631346
#endif
12641347

1348+
SIMDE_FUNCTION_ATTRIBUTES
1349+
simde__m256i
1350+
simde_x_mm256_cmpneq_epi32 (simde__m256i a, simde__m256i b) {
1351+
simde__m256i_private
1352+
r_,
1353+
a_ = simde__m256i_to_private(a),
1354+
b_ = simde__m256i_to_private(b);
1355+
1356+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1357+
r_.m128i[0] = simde_mm_x_cmpneq_epi32(a_.m128i[0], b_.m128i[0]);
1358+
r_.m128i[1] = simde_mm_x_cmpneq_epi32(a_.m128i[1], b_.m128i[1]);
1359+
#else
1360+
SIMDE_VECTORIZE
1361+
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
1362+
r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0);
1363+
}
1364+
#endif
1365+
1366+
return simde__m256i_from_private(r_);
1367+
}
1368+
12651369
SIMDE_FUNCTION_ATTRIBUTES
12661370
simde__m256i
12671371
simde_x_mm256_cmpeq_epu32 (simde__m256i a, simde__m256i b) {
@@ -1283,6 +1387,28 @@ simde_x_mm256_cmpeq_epu32 (simde__m256i a, simde__m256i b) {
12831387
return simde__m256i_from_private(r_);
12841388
}
12851389

1390+
SIMDE_FUNCTION_ATTRIBUTES
1391+
simde__m256i
1392+
simde_x_mm256_cmpneq_epu32 (simde__m256i a, simde__m256i b) {
1393+
simde__m256i_private
1394+
r_,
1395+
a_ = simde__m256i_to_private(a),
1396+
b_ = simde__m256i_to_private(b);
1397+
1398+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1399+
r_.m128i[0] = simde_x_mm_cmpneq_epu32(a_.m128i[0], b_.m128i[0]);
1400+
r_.m128i[1] = simde_x_mm_cmpneq_epu32(a_.m128i[1], b_.m128i[1]);
1401+
#else
1402+
SIMDE_VECTORIZE
1403+
for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) {
1404+
r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? UINT32_MAX : UINT32_C(0);
1405+
}
1406+
#endif
1407+
1408+
return simde__m256i_from_private(r_);
1409+
}
1410+
1411+
12861412
SIMDE_FUNCTION_ATTRIBUTES
12871413
simde__m256i
12881414
simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) {
@@ -1314,6 +1440,27 @@ simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) {
13141440
#define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b)
13151441
#endif
13161442

1443+
SIMDE_FUNCTION_ATTRIBUTES
1444+
simde__m256i
1445+
simde_x_mm256_cmpneq_epi64 (simde__m256i a, simde__m256i b) {
1446+
simde__m256i_private
1447+
r_,
1448+
a_ = simde__m256i_to_private(a),
1449+
b_ = simde__m256i_to_private(b);
1450+
1451+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1452+
r_.m128i[0] = simde_x_mm_cmpneq_epi64(a_.m128i[0], b_.m128i[0]);
1453+
r_.m128i[1] = simde_x_mm_cmpneq_epi64(a_.m128i[1], b_.m128i[1]);
1454+
#else
1455+
SIMDE_VECTORIZE
1456+
for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
1457+
r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0);
1458+
}
1459+
#endif
1460+
1461+
return simde__m256i_from_private(r_);
1462+
}
1463+
13171464
SIMDE_FUNCTION_ATTRIBUTES
13181465
simde__m256i
13191466
simde_x_mm256_cmpeq_epu64 (simde__m256i a, simde__m256i b) {
@@ -1335,6 +1482,27 @@ simde_x_mm256_cmpeq_epu64 (simde__m256i a, simde__m256i b) {
13351482
return simde__m256i_from_private(r_);
13361483
}
13371484

1485+
SIMDE_FUNCTION_ATTRIBUTES
1486+
simde__m256i
1487+
simde_x_mm256_cmpneq_epu64 (simde__m256i a, simde__m256i b) {
1488+
simde__m256i_private
1489+
r_,
1490+
a_ = simde__m256i_to_private(a),
1491+
b_ = simde__m256i_to_private(b);
1492+
1493+
#if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128)
1494+
r_.m128i[0] = simde_x_mm_cmpneq_epu64(a_.m128i[0], b_.m128i[0]);
1495+
r_.m128i[1] = simde_x_mm_cmpneq_epu64(a_.m128i[1], b_.m128i[1]);
1496+
#else
1497+
SIMDE_VECTORIZE
1498+
for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
1499+
r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? UINT64_MAX : UINT64_C(0);
1500+
}
1501+
#endif
1502+
1503+
return simde__m256i_from_private(r_);
1504+
}
1505+
13381506
SIMDE_FUNCTION_ATTRIBUTES
13391507
simde__m256i
13401508
simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) {

0 commit comments

Comments
 (0)