Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 262 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,268 @@ SIMDE__BEGIN_DECLS
# define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
#endif

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS
#define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS
#define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS
#define SIMDE__SIDD_SWORD_OPS _SIDD_SWORD_OPS
#define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY
#define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES
#define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH
#define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED
#define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY
#define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY
#define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY
#define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY
#define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT
#define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT
#define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK
#define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK

#else
#define SIMDE_SIDD_UBYTE_OPS 0x00
#define SIMDE_SIDD_UWORD_OPS 0x01
#define SIMDE_SIDD_SBYTE_OPS 0x02
#define SIMDE_SIDD_SWORD_OPS 0x03
#define SIMDE_SIDD_CMP_EQUAL_ANY 0x00
#define SIMDE_SIDD_CMP_RANGES 0x04
#define SIMDE_SIDD_CMP_EQUAL_EACH 0x08
#define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c
#define SIMDE_SIDD_POSITIVE_POLARITY 0x00
#define SIMDE_SIDD_NEGATIVE_POLARITY 0x10
#define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20
#define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30
#define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00
#define SIMDE_SIDD_MOST_SIGNIFICANT 0x40
#define SIMDE_SIDD_BIT_MASK 0x00
#define SIMDE_SIDD_UNIT_MASK 0x40
#endif

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_RANGES:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i8[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xff;
for(int i = 0 ; i < upper_bound ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
}
break;
}
for(int i = 0; i < upper_bound ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 127) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < upper_bound ; i++) {
for(int j = 0; j< upper_bound ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_RANGES:
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bitvalue = 1;
else if(a_invalid && b_invalid)
bitvalue = 1;
else
bitvalue = 0;
break;
}
bool_res_.i16[i] |= (bitvalue << j);
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < upper_bound ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < upper_bound ; j++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = 0xffff;
for(int i = 0 ; i < upper_bound ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(&:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
}
break;
}
for(int i = 0; i < upper_bound ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return !int_res_2 & (lb > upper_bound);
}

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#define simde_mm_cmpestra(a, la, b, lb, imm8) _mm_cmpestra(a, la, b, lb, imm8)
#else
#define simde_mm_cmpestra(a, la, b, lb, imm8) \
(((imm8) & SIMDE_SIDD_UWORD_OPS) \
? simde_mm_cmpestra_16_((a), (la), (b), (lb), (imm8)) \
: simde_mm_cmpestra_8_((a), (la), (b), (lb), (imm8)))
#endif
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) {
Expand Down
107 changes: 107 additions & 0 deletions test/x86/sse4.2.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,116 @@
#define SIMDE_TESTS_CURRENT_ISAX sse4_2
#include <test/x86/test-x86-internal.h>
#include <simde/x86/sse4.2.h>
#include <assert.h>

#if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)
/*
static MunitResult
test_simde_mm_cmpestra_16(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) | 1);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/16);
lb = munit_rand_int_range(0, 128/16);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}

static MunitResult
test_simde_mm_cmpestra_8(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) & 0);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/8);
lb = munit_rand_int_range(0, 128/8);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}
*/
static MunitResult
test_simde_mm_cmpgt_epi64(const MunitParameter params[], void* data) {
(void) params;
Expand Down