Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 226 additions & 0 deletions simde/x86/sse4.2.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,232 @@ SIMDE__BEGIN_DECLS
# define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES
#endif

#define SIMDE_SIDD_CMP_EQUAL_ANY 0
#define SIMDE_SIDD_CMP_RANGES 1
#define SIMDE_SIDD_CMP_EQUAL_EACH 2
#define SIMDE_SIDD_CMP_EQUAL_ORDERED 3

#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
#define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY
#define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES
#define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH
#define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED
#endif

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_8_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 8) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < (upper_bound) ; i++) {
for(int j = 0; j< (upper_bound) ; j++){
int bitvalue = ((a_.i8[i] == b_.i8[j]) ? 1 : 0);
bool_res_.i8[i] |= (( bitvalue ) << j);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
break;
case SIMDE_SIDD_CMP_RANGES:
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bool_res_.i8[i] |= (1 << j);
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bool_res_.i8[i] |= (1 << j);
else if(a_invalid && b_invalid)
bool_res_.i8[i] |= (1 << j);
break;
}
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i8[i] >> j) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= ((((bool_res_.i8[i] >> j) & 1) & ((bool_res_.i8[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i8[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = (imm8 & 1) ? 0xff : 0xffff;
for(int i = 0 ; i < (upper_bound) ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i8[k] >> j) & 1 ) << i) ;
k += 1;
}
}
}
for(int i = 0; i < (upper_bound) ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ( ((int_res_1 >> i) & 1) << i);
}
}
return ( (int_res_2 == 0) & (lb > upper_bound) );
}

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra_16_(simde__m128i a, int la, simde__m128i b, int lb, const int imm8) {
const int cmp_op = imm8 & 0x06;
const int polarity = imm8 & 0x30;
simde__m128i_private
bool_res_ = simde__m128i_to_private(simde_mm_setzero_si128()),
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
const int upper_bound = (128 / 16) - 1;
int a_invalid = 0;
int b_invalid = 0;
for(int i = 0 ; i < (upper_bound) ; i++) {
for(int j = 0; j< (upper_bound) ; j++)
{
int bitvalue = ((a_.i16[i] == b_.i16[j]) ? 1 : 0);
bool_res_.i16[i] |= ((bitvalue) << j);
if(i == la)
a_invalid = 1;
if(j == lb)
b_invalid = 1;
switch(cmp_op){
case SIMDE_SIDD_CMP_EQUAL_ANY:
break;
case SIMDE_SIDD_CMP_RANGES:
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
if(a_invalid && b_invalid)
bool_res_.i16[i] |= (1 << j);
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
if(a_invalid && !b_invalid)
bool_res_.i16[i] |= (1 << j);
else if(a_invalid && b_invalid)
bool_res_.i16[i] |= (1 << j);
break;
}
}
}
int32_t int_res_1 = 0;
int32_t int_res_2 = 0;
switch(cmp_op) {
case SIMDE_SIDD_CMP_EQUAL_ANY:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for (int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i16[i] >> j) & 1) << i) ;
}
}
break;
case SIMDE_SIDD_CMP_RANGES:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= ((((bool_res_.i16[i] >> j) & 1) & ((bool_res_.i16[i] >> (j + 1)) & 1)) << i);
j += 2;
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_EACH:
for(int i = 0 ; i < (upper_bound) ; i++){
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound) ; j++){
int_res_1 |= (((bool_res_.i16[i] >> i) & 1) << i);
}
}
break;
case SIMDE_SIDD_CMP_EQUAL_ORDERED:
int_res_1 = (imm8 & 1) ? 0xff : 0xffff;
for(int i = 0 ; i < (upper_bound) ; i++){
int k = i;
SIMDE__VECTORIZE_REDUCTION(|:int_res_1)
for(int j = 0 ; j < (upper_bound-i) ; j++){
int_res_1 &= (((bool_res_.i16[k] >> j) & 1) << i) ;
k += 1;
}
}
}
for(int i = 0; i < (upper_bound) ; i++){
if(polarity & 1){
if((polarity >> 1) & 1) {
if (i >= lb) {
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
else {
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= ((((int_res_1 >> i) & 1) ^ (-1)) << i);
}
}
else{
int_res_2 |= (((int_res_1 >> i) & 1) << i);
}
}
return ((int_res_2 == 0) & (lb > upper_bound));
}

SIMDE__FUNCTION_ATTRIBUTES
int
simde_mm_cmpestra(simde__m128i a, int la, simde__m128i b, int lb, const int imm8){
#if defined(SIMDE_X86_SSE4_2_NATIVE)
return _mm_cmpestra(a, la, b, lb, imm8);
#else
const int character_type = imm8 & 0x03;
if(character_type & 1)
return simde_mm_cmpestra_8_(a, la, b, lb, imm8);
else
return simde_mm_cmpestra_16_(a, la, b, lb, imm8);
#endif
}
#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES)
# define _mm_cmpestra(a, la, b, lb, imm8) simde_mm_cmpestra(a, la, b, lb, imm8)
#endif

SIMDE__FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) {
Expand Down
107 changes: 107 additions & 0 deletions test/x86/sse4.2.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,116 @@
#define SIMDE_TESTS_CURRENT_ISAX sse4_2
#include <test/x86/test-x86-internal.h>
#include <simde/x86/sse4.2.h>
#include <assert.h>

#if defined(SIMDE_X86_SSE4_2_NATIVE) || defined(SIMDE_NO_NATIVE) || defined(SIMDE_ALWAYS_BUILD_NATIVE_TESTS)

static MunitResult
test_simde_mm_cmpestra_16(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) | 1);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/16);
lb = munit_rand_int_range(0, 128/16);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
a.i16[7], a.i16[6], a.i16[5], a.i16[4], a.i16[3], a.i16[2], a.i16[1], a.i16[0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
" INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")),\n",
b.i16[7], b.i16[6], b.i16[5], b.i16[4], b.i16[3], b.i16[2], b.i16[1], b.i16[0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}

static MunitResult
test_simde_mm_cmpestra_8(const MunitParameter params[], void* data) {
(void) params;
(void) data;

const struct {
simde__m128i a;
int la;
simde__m128i b;
int lb;
const int imm8;
int r;
} test_vec[8] = {

};

printf("\n");
for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
simde__m128i_private a, b;
int la, lb, r;
const int imm8 = (munit_rand_int_range(0, UINT8_MAX) & 0);

munit_rand_memory(sizeof(a), (uint8_t*) &a);
munit_rand_memory(sizeof(b), (uint8_t*) &b);
la = munit_rand_int_range(0, 128/8);
lb = munit_rand_int_range(0, 128/8);

r = simde_mm_cmpestra(simde__m128i_from_private(a), la, simde__m128i_from_private(b), lb, imm8);

printf(" { simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
a.i8[15], a.i8[14], a.i8[13], a.i8[12], a.i8[11], a.i8[10], a.i8[ 9], a.i8[ 8],
a.i8[ 7], a.i8[ 6], a.i8[ 5], a.i8[ 4], a.i8[ 3], a.i8[ 2], a.i8[ 1], a.i8[ 0]);
printf(" %d ,\n",la);
printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
" INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")),\n",
b.i8[15], b.i8[14], b.i8[13], b.i8[12], b.i8[11], b.i8[10], b.i8[ 9], b.i8[ 8],
b.i8[ 7], b.i8[ 6], b.i8[ 5], b.i8[ 4], b.i8[ 3], b.i8[ 2], b.i8[ 1], b.i8[ 0]);
printf(" %d ,\n",lb);
printf(" %d ,\n",imm8);
printf(" %d },\n",r);
}
return MUNIT_FAIL;

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
int r = simde_mm_cmpestra(test_vec[i].a, test_vec[i].la, test_vec[i].b, test_vec[i].lb, test_vec[i].imm8);
assert(r == test_vec[i].r);
}

return MUNIT_OK;
}

static MunitResult
test_simde_mm_cmpgt_epi64(const MunitParameter params[], void* data) {
(void) params;
Expand Down