Skip to content

Commit 2971ff4

Browse files
authored
Merge pull request ceph#54882 from Svelar/isa_xor_dev
erasure-code/isa/xor_op: add neon-based region_xor implementation
2 parents 9652842 + 8178d44 commit 2971ff4

File tree

2 files changed

+66
-2
lines changed

2 files changed

+66
-2
lines changed

src/erasure-code/isa/xor_op.cc

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
#include <stdio.h>
1616
#include <string.h>
1717
#include "arch/intel.h"
18+
#include "arch/arm.h"
19+
20+
#if defined(__aarch64__) && defined(__ARM_NEON)
21+
#include <arm_neon.h>
22+
#endif
1823

1924
#include "include/ceph_assert.h"
2025

@@ -101,6 +106,16 @@ region_xor(unsigned char** src,
101106
// 64-byte region xor
102107
region_sse2_xor((char**) src, (char*) parity, src_size, region_size);
103108
} else
109+
#elif defined (__aarch64__) && defined(__ARM_NEON)
110+
if (ceph_arch_neon) {
111+
// -----------------------------
112+
// use NEON region xor function
113+
// -----------------------------
114+
unsigned region_size =
115+
(size / EC_ISA_VECTOR_NEON_WORDSIZE) * EC_ISA_VECTOR_NEON_WORDSIZE;
116+
size_left -= region_size;
117+
region_neon_xor((char**) src, (char *) parity, src_size, region_size);
118+
} else
104119
#endif
105120
{
106121
// --------------------------------------------
@@ -181,3 +196,42 @@ region_sse2_xor(char** src,
181196
#endif // __x86_64__
182197
return;
183198
}
199+
200+
#if defined(__aarch64__) && defined(__ARM_NEON)
201+
void
202+
// -----------------------------------------------------------------------------
203+
region_neon_xor(char **src,
204+
char *parity,
205+
int src_size,
206+
unsigned size)
207+
// -----------------------------------------------------------------------------
208+
{
209+
ceph_assert(!(size % EC_ISA_VECTOR_NEON_WORDSIZE));
210+
unsigned char *p = (unsigned char *)parity;
211+
unsigned char *vbuf[256] = { NULL };
212+
for (int v = 0; v < src_size; v++) {
213+
vbuf[v] = (unsigned char *)src[v];
214+
}
215+
216+
// ----------------------------------------------------------------------------------------
217+
// NEON load instructions can load 128bits of data each time, and there are 2 load channels
218+
// ----------------------------------------------------------------------------------------
219+
for (unsigned i = 0; i < size; i += EC_ISA_VECTOR_NEON_WORDSIZE) {
220+
uint64x2_t d0_1 = vld1q_u64((uint64_t *)(&(vbuf[0][i])));
221+
uint64x2_t d0_2 = vld1q_u64((uint64_t *)(&(vbuf[0][i + 16])));
222+
223+
for (int d = 1; d < src_size; d++) {
224+
uint64x2_t di_1 = vld1q_u64((uint64_t *)(&(vbuf[d][i])));
225+
uint64x2_t di_2 = vld1q_u64((uint64_t *)(&(vbuf[d][i + 16])));
226+
227+
d0_1 = veorq_u64(d0_1, di_1);
228+
d0_2 = veorq_u64(d0_2, di_2);
229+
}
230+
231+
vst1q_u64((uint64_t *)p, d0_1);
232+
vst1q_u64((uint64_t *)(p + 16), d0_2);
233+
p += EC_ISA_VECTOR_NEON_WORDSIZE;
234+
}
235+
return;
236+
}
237+
#endif // __aarch64__ && __ARM_NEON

src/erasure-code/isa/xor_op.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
#define EC_ISA_ADDRESS_ALIGNMENT 32u
2929
#define EC_ISA_VECTOR_SSE2_WORDSIZE 64u
30-
30+
#define EC_ISA_VECTOR_NEON_WORDSIZE 32u
3131
#if __GNUC__ > 4 || \
3232
( (__GNUC__ == 4) && (__GNUC_MINOR__ >= 4) ) ||\
3333
(__clang__ == 1 )
@@ -83,5 +83,15 @@ region_sse2_xor(char** src /* array of 64-byte aligned source pointer to xor */,
8383
int src_size /* size of the source pointer array */,
8484
unsigned size /* size of the region to xor */);
8585

86-
86+
#if defined(__aarch64__) && defined(__ARM_NEON)
87+
// -------------------------------------------------------------------------
88+
// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-1]
89+
// using NEON 32-byte operations
90+
// -------------------------------------------------------------------------
91+
void
92+
region_neon_xor(char** src /* array of 64-byte aligned source pointer to xor */,
93+
char* parity /* 32-byte aligned output pointer containing the parity */,
94+
int src_size /* size of the source pointer array */,
95+
unsigned size /* size of the region to xor */);
96+
#endif // __aarch64__ && __ARM_NEON
8797
#endif // EC_ISA_XOR_OP_H

0 commit comments

Comments
 (0)