1414#elif (defined(__ARM_NEON) && !defined(CT2_WITH_CPU_DISPATCH)) || defined(USE_NEON)
1515# define TARGET_ISA CpuIsa::NEON
1616# include " cpu/vec_neon.h"
17+ #elif (defined(CT2_WITH_RVV) && defined(__riscv_vector))
18+ # define USE_RVV
19+ # define TARGET_ISA CpuIsa::RVV
20+ # include " cpu/vec_rvv.h"
1721#else
1822# define TARGET_ISA CpuIsa::GENERIC
1923# include " cpu/vec.h"
@@ -213,7 +217,7 @@ namespace ctranslate2 {
213217
214218 template <>
215219 void exp<TARGET_ISA>(const float * x, float * y, dim_t size) {
216- vectorized_unary_transform<TARGET_ISA>(x, y, size, Vec<float , TARGET_ISA>::exp);
220+ vectorized_unary_transform<TARGET_ISA>(x, y, size, Vec<float , TARGET_ISA>::exp);
217221 }
218222
219223 template <>
@@ -263,11 +267,20 @@ namespace ctranslate2 {
263267
264268 template <CpuIsa ISA, typename T>
265269 void add (T a, const T* x, T* y, dim_t size) {
270+ #ifdef USE_RVV
271+ T a_copy = a;
272+ vectorized_unary_transform<ISA>(x, y, size,
273+ [a_copy](vec_type<T, ISA> v) {
274+ auto vec_a = Vec<T, ISA>::load (a_copy);
275+ return Vec<T, ISA>::add (v, vec_a);
276+ });
277+ #else
266278 auto vec_a = Vec<T, ISA>::load (a);
267279 vectorized_unary_transform<ISA>(x, y, size,
268- [vec_a](vec_type<T, ISA> v) {
269- return Vec<T, ISA>::add (v, vec_a);
270- });
280+ [vec_a](vec_type<T, ISA> v) {
281+ return Vec<T, ISA>::add (v, vec_a);
282+ });
283+ #endif
271284 }
272285
273286 template <CpuIsa ISA, typename T>
@@ -282,11 +295,20 @@ namespace ctranslate2 {
282295
283296 template <CpuIsa ISA, typename T>
284297 void mul (T a, const T* x, T* y, dim_t size) {
298+ #ifdef USE_RVV
299+ T a_copy = a;
300+ vectorized_unary_transform<ISA>(x, y, size,
301+ [a_copy](vec_type<T, ISA> v) {
302+ auto vec_a = Vec<T, ISA>::load (a_copy);
303+ return Vec<T, ISA>::mul (v, vec_a);
304+ });
305+ #else
285306 auto vec_a = Vec<T, ISA>::load (a);
286307 vectorized_unary_transform<ISA>(x, y, size,
287- [vec_a](vec_type<T, ISA> v) {
288- return Vec<T, ISA>::mul (v, vec_a);
289- });
308+ [vec_a](vec_type<T, ISA> v) {
309+ return Vec<T, ISA>::mul (v, vec_a);
310+ });
311+ #endif
290312 }
291313
292314 template <CpuIsa ISA, typename T>
@@ -296,11 +318,20 @@ namespace ctranslate2 {
296318
297319 template <CpuIsa ISA, typename T>
298320 void max (T a, const T* x, T* y, dim_t size) {
321+ #ifdef USE_RVV
322+ T a_copy = a;
323+ vectorized_unary_transform<ISA>(x, y, size,
324+ [a_copy](vec_type<T, ISA> v) {
325+ auto vec_a = Vec<T, ISA>::load (a_copy);
326+ return Vec<T, ISA>::max (v, vec_a);
327+ });
328+ #else
299329 auto vec_a = Vec<T, ISA>::load (a);
300330 vectorized_unary_transform<ISA>(x, y, size,
301- [vec_a](vec_type<T, ISA> v) {
302- return Vec<T, ISA>::max (v, vec_a);
303- });
331+ [vec_a](vec_type<T, ISA> v) {
332+ return Vec<T, ISA>::max (v, vec_a);
333+ });
334+ #endif
304335 }
305336
306337 template <CpuIsa ISA, typename T>
@@ -310,11 +341,20 @@ namespace ctranslate2 {
310341
311342 template <CpuIsa ISA, typename T>
312343 void min (T a, const T* x, T* y, dim_t size) {
344+ #ifdef USE_RVV
345+ T a_copy = a;
346+ vectorized_unary_transform<ISA>(x, y, size,
347+ [a_copy](vec_type<T, ISA> v) {
348+ auto vec_a = Vec<T, ISA>::load (a_copy);
349+ return Vec<T, ISA>::min (v, vec_a);
350+ });
351+ #else
313352 auto vec_a = Vec<T, ISA>::load (a);
314353 vectorized_unary_transform<ISA>(x, y, size,
315- [vec_a](vec_type<T, ISA> v) {
316- return Vec<T, ISA>::min (v, vec_a);
317- });
354+ [vec_a](vec_type<T, ISA> v) {
355+ return Vec<T, ISA>::min (v, vec_a);
356+ });
357+ #endif
318358 }
319359
320360 template <CpuIsa ISA, typename T>
@@ -349,6 +389,7 @@ namespace ctranslate2 {
349389 static_cast <T>(0 ),
350390 Vec<T, ISA>::abs,
351391 Vec<T, ISA>::max,
392+
352393 Vec<T, ISA>::reduce_max,
353394 Vec<T>::abs,
354395 Vec<T>::max);
@@ -377,14 +418,22 @@ namespace ctranslate2 {
377418 using VecType = Vec<float , TARGET_ISA>;
378419
379420 const auto x_max = reduce_max<TARGET_ISA>(x, size);
380- const auto vec_x_max = VecType::load (x_max);
381421
382- const auto scalar_exp_func = [x_max](vec_type< float > v) {
383- return Vec< float > ::exp (Vec< float >:: sub (v, x_max) );
422+ const auto scalar_exp_func = [x_max](float v) {
423+ return std ::exp (v - x_max);
384424 };
385- const auto vec_exp_func = [vec_x_max](vec_type<float , TARGET_ISA> v) {
425+ #ifdef USE_RVV
426+ float x_max_copy = x_max;
427+ auto vec_exp_func = [x_max_copy](vec_type<float , TARGET_ISA> v) {
428+ auto vec_x_max = VecType::load (x_max_copy);
386429 return VecType::exp (VecType::sub (v, vec_x_max));
387430 };
431+ #else
432+ const auto vec_x_max = VecType::load (x_max);
433+ auto vec_exp_func = [vec_x_max](vec_type<float , TARGET_ISA> v) {
434+ return VecType::exp (VecType::sub (v, vec_x_max));
435+ };
436+ #endif
388437
389438 const auto exp_sum = vectorized_map_reduce_all<TARGET_ISA>(
390439 x,
@@ -429,14 +478,21 @@ namespace ctranslate2 {
429478 }
430479
431480 const auto x_max = reduce_max<TARGET_ISA>(x, size);
432- const auto vec_x_max = VecType::load (x_max);
433-
434- const auto scalar_exp_func = [x_max](vec_type<float > v) {
435- return Vec<float >::exp (Vec<float >::sub (v, x_max));
481+ const auto scalar_exp_func = [x_max](float v) {
482+ return std::exp (v - x_max);
483+ };
484+ #ifdef USE_RVV
485+ float x_max_copy = x_max;
486+ auto vec_exp_func = [x_max_copy](vec_type<float , TARGET_ISA> v) {
487+ auto vec_x_max = VecType::load (x_max_copy);
488+ return VecType::exp (VecType::sub (v, vec_x_max));
436489 };
437- const auto vec_exp_func = [vec_x_max](vec_type<float , TARGET_ISA> v) {
490+ #else
491+ const auto vec_x_max = VecType::load (x_max);
492+ auto vec_exp_func = [vec_x_max](vec_type<float , TARGET_ISA> v) {
438493 return VecType::exp (VecType::sub (v, vec_x_max));
439494 };
495+ #endif
440496
441497 if (log) {
442498 const auto exp_sum = vectorized_map_reduce_all<TARGET_ISA>(
0 commit comments