@@ -136,24 +136,25 @@ _Generic((*(out)), \
136136 } \
137137 }
138138#elif defined(GENERATE_SVE_CODE )
139- #define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
139+ #define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
140+ SVE_ATTR \
140141 static void OP_CONCAT (ompi_op_aarch64_2buff_ ##name ##_##type##type_size##_t, APPEND) \
141- (const void *_in, void *_out, int *count, \
142- struct ompi_datatype_t **dtype, \
143- struct ompi_op_base_module_1_0_0_t *module) \
144- { \
145- const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
146- const int cnt = *count; \
147- type##type_size##_t *in = (type##type_size##_t *) _in, \
148- *out = (type##type_size##_t *) _out; \
149- OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
150- for (int idx=0; idx < cnt; idx += types_per_step) { \
151- svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
152- vsrc = svld1(pred, &in[idx]); \
153- vdst = svld1(pred, &out[idx]); \
154- vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
155- OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
156- } \
142+ (const void *_in, void *_out, int *count, \
143+ struct ompi_datatype_t **dtype, \
144+ struct ompi_op_base_module_1_0_0_t *module) \
145+ { \
146+ const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
147+ const int cnt = *count; \
148+ type##type_size##_t *in = (type##type_size##_t *) _in, \
149+ *out = (type##type_size##_t *) _out; \
150+ OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
151+ for (int idx=0; idx < cnt; idx += types_per_step) { \
152+ svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
153+ vsrc = svld1(pred, &in[idx]); \
154+ vdst = svld1(pred, &out[idx]); \
155+ vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
156+ OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
157+ } \
157158 }
158159#endif
159160
@@ -302,25 +303,26 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
302303 } \
303304}
304305#elif defined(GENERATE_SVE_CODE )
305- #define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
306- static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
307- (const void *_in1, const void *_in2, void *_out, int *count, \
308- struct ompi_datatype_t **dtype, \
309- struct ompi_op_base_module_1_0_0_t *module) \
310- { \
311- const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
312- type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
313- *in2 = (type##type_size##_t *) _in2, \
314- *out = (type##type_size##_t *) _out; \
315- const int cnt = *count; \
316- OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
317- for (int idx=0; idx < cnt; idx += types_per_step) { \
318- svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
319- vsrc = svld1(pred, &in1[idx]); \
320- vdst = svld1(pred, &in2[idx]); \
321- vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
322- OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
323- } \
306+ #define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
307+ SVE_ATTR \
308+ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
309+ (const void *_in1, const void *_in2, void *_out, int *count, \
310+ struct ompi_datatype_t **dtype, \
311+ struct ompi_op_base_module_1_0_0_t *module) \
312+ { \
313+ const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
314+ type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
315+ *in2 = (type##type_size##_t *) _in2, \
316+ *out = (type##type_size##_t *) _out; \
317+ const int cnt = *count; \
318+ OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
319+ for (int idx=0; idx < cnt; idx += types_per_step) { \
320+ svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
321+ vsrc = svld1(pred, &in1[idx]); \
322+ vdst = svld1(pred, &in2[idx]); \
323+ vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
324+ OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
325+ } \
324326}
325327#endif /* defined(GENERATE_SVE_CODE) */
326328
0 commit comments