Skip to content

Commit fa8c771

Browse files
benwtrentldematte
andauthored
Adding native code related to (elastic#138204) (elastic#138239)
* Adding native code related to (elastic#138204) * Bump simdvec native lib build/publish VERSION --------- Co-authored-by: Lorenzo Dematte <[email protected]>
1 parent 860b86b commit fa8c771

File tree

5 files changed

+79
-1
lines changed

5 files changed

+79
-1
lines changed

libs/simdvec/native/publish_vec_binaries.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ if [ -z "$ARTIFACTORY_API_KEY" ]; then
2020
exit 1;
2121
fi
2222

23-
VERSION="1.0.13"
23+
VERSION="1.0.14"
2424
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
2525
TEMP=$(mktemp -d)
2626

libs/simdvec/native/src/vec/c/aarch64/vec.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,31 @@ EXPORT int32_t dot7u(int8_t* a, int8_t* b, size_t dims) {
9595
return res;
9696
}
9797

98+
EXPORT void dot7u_bulk(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results) {
99+
int32_t res = 0;
100+
if (dims > DOT7U_STRIDE_BYTES_LEN) {
101+
int limit = dims & ~(DOT7U_STRIDE_BYTES_LEN - 1);
102+
for (size_t c = 0; c < count; c++) {
103+
int i = limit;
104+
res = dot7u_inner(a, b, i);
105+
for (; i < dims; i++) {
106+
res += a[i] * b[i];
107+
}
108+
results[c] = (float_t)res;
109+
a += dims;
110+
}
111+
} else {
112+
for (size_t c = 0; c < count; c++) {
113+
res = 0;
114+
for (size_t i = 0; i < dims; i++) {
115+
res += a[i] * b[i];
116+
}
117+
results[c] = (float_t)res;
118+
a += dims;
119+
}
120+
}
121+
}
122+
98123
static inline int32_t sqr7u_inner(int8_t *a, int8_t *b, size_t dims) {
99124
int32x4_t acc1 = vdupq_n_s32(0);
100125
int32x4_t acc2 = vdupq_n_s32(0);

libs/simdvec/native/src/vec/c/amd64/vec.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,31 @@ EXPORT int32_t dot7u(int8_t* a, int8_t* b, size_t dims) {
153153
return res;
154154
}
155155

156+
EXPORT void dot7u_bulk(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results) {
157+
int32_t res = 0;
158+
if (dims > STRIDE_BYTES_LEN) {
159+
int limit = dims & ~(STRIDE_BYTES_LEN - 1);
160+
for (size_t c = 0; c < count; c++) {
161+
int i = limit;
162+
res = dot7u_inner(a, b, i);
163+
for (; i < dims; i++) {
164+
res += a[i] * b[i];
165+
}
166+
results[c] = (float_t)res;
167+
a += dims;
168+
}
169+
} else {
170+
for (size_t c = 0; c < count; c++) {
171+
res = 0;
172+
for (size_t i = 0; i < dims; i++) {
173+
res += a[i] * b[i];
174+
}
175+
results[c] = (float_t)res;
176+
a += dims;
177+
}
178+
}
179+
}
180+
156181
static inline int32_t sqr7u_inner(int8_t *a, int8_t *b, size_t dims) {
157182
// Init accumulator(s) with 0
158183
__m256i acc1 = _mm256_setzero_si256();

libs/simdvec/native/src/vec/c/amd64/vec_2.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,32 @@ EXPORT int32_t dot7u_2(int8_t* a, int8_t* b, size_t dims) {
114114
return res;
115115
}
116116

117+
extern "C"
118+
EXPORT void dot7u_bulk_2(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results) {
119+
int32_t res = 0;
120+
if (dims > STRIDE_BYTES_LEN) {
121+
int limit = dims & ~(STRIDE_BYTES_LEN - 1);
122+
for (size_t c = 0; c < count; c++) {
123+
int i = limit;
124+
res = dot7u_inner_avx512(a, b, i);
125+
for (; i < dims; i++) {
126+
res += a[i] * b[i];
127+
}
128+
results[c] = (float_t)res;
129+
a += dims;
130+
}
131+
} else {
132+
for (size_t c = 0; c < count; c++) {
133+
res = 0;
134+
for (size_t i = 0; i < dims; i++) {
135+
res += a[i] * b[i];
136+
}
137+
results[c] = (float_t)res;
138+
a += dims;
139+
}
140+
}
141+
}
142+
117143
template<int offsetRegs>
118144
inline __m512i sqr8(__m512i acc, const int8_t* p1, const int8_t* p2) {
119145
constexpr int lanes = offsetRegs * STRIDE_BYTES_LEN;

libs/simdvec/native/src/vec/headers/vec.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ EXPORT int vec_caps();
1919

2020
EXPORT int32_t dot7u(int8_t* a, int8_t* b, size_t dims);
2121

22+
EXPORT void dot7u_bulk(int8_t* a, int8_t* b, size_t dims, size_t count, float_t* results);
23+
2224
EXPORT int32_t sqr7u(int8_t *a, int8_t *b, size_t length);
2325

2426
EXPORT float cosf32(const float *a, const float *b, size_t elementCount);

0 commit comments

Comments
 (0)