Skip to content

Commit cbe42e7

Browse files
committed
SIMD: add rvv optimization for RISC-V
1 parent 3b423b5 commit cbe42e7

File tree

17 files changed

+4948
-7
lines changed

17 files changed

+4948
-7
lines changed

meson.options

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ option('test-simd', type: 'array',
3535
'VSX', 'VSX2', 'VSX3', 'VSX4',
3636
'NEON', 'ASIMD',
3737
'VX', 'VXE', 'VXE2',
38-
'LSX',
38+
'LSX','RVV',
3939
],
4040
description: 'Specify a list of CPU features to be tested against NumPy SIMD interface')
4141
option('test-simd-args', type: 'string', value: '',

meson_cpu/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ min_features = {
9797
's390x': [],
9898
'arm': [],
9999
'aarch64': [ASIMD],
100-
'riscv64': [],
100+
'riscv64': [RVV],
101101
'wasm32': [],
102102
'loongarch64': [LSX],
103103
}.get(cpu_family, [])

numpy/_core/meson.build

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ if host_machine.cpu_family() == 'loongarch64'
103103
add_project_arguments(['-DHWY_COMPILE_ONLY_SCALAR'], language: ['cpp'])
104104
endif
105105

106+
if host_machine.cpu_family() == 'riscv64'
107+
add_project_arguments('-march=rv64gcv_zvl128b', '-mrvv-vector-bits=zvl', language: ['c','cpp'])
108+
endif
109+
106110
use_highway = not get_option('disable-highway')
107111
if use_highway and not fs.exists('src/highway/README.md')
108112
error('Missing the `highway` git submodule! Run `git submodule update --init` to fix this.')
@@ -740,6 +744,7 @@ _umath_tests_mtargets = mod_features.multi_targets(
740744
ASIMDHP, ASIMD, NEON,
741745
VSX3, VSX2, VSX,
742746
VXE, VX,
747+
RVV,
743748
],
744749
baseline: CPU_BASELINE,
745750
prefix: 'NPY_',
@@ -784,7 +789,8 @@ foreach gen_mtargets : [
784789
AVX512_SKX, AVX2, XOP, SSE42, SSE2,
785790
VSX2,
786791
ASIMD, NEON,
787-
VXE, VX
792+
VXE, VX,
793+
RVV,
788794
]
789795
],
790796
]
@@ -887,6 +893,7 @@ foreach gen_mtargets : [
887893
VSX3, VSX2,
888894
VXE, VX,
889895
LSX,
896+
RVV,
890897
]
891898
],
892899
[
@@ -898,6 +905,7 @@ foreach gen_mtargets : [
898905
VSX4, VSX2,
899906
VX,
900907
LSX,
908+
RVV,
901909
]
902910
],
903911
[
@@ -909,6 +917,7 @@ foreach gen_mtargets : [
909917
NEON,
910918
VXE, VX,
911919
LSX,
920+
RVV,
912921
]
913922
],
914923
[
@@ -938,6 +947,7 @@ foreach gen_mtargets : [
938947
VSX2,
939948
VX,
940949
LSX,
950+
RVV,
941951
]
942952
],
943953
[
@@ -949,6 +959,7 @@ foreach gen_mtargets : [
949959
VSX2,
950960
VXE, VX,
951961
LSX,
962+
RVV,
952963
]
953964
],
954965
[
@@ -967,6 +978,7 @@ foreach gen_mtargets : [
967978
NEON_VFPV4,
968979
VXE2, VXE,
969980
LSX,
981+
RVV,
970982
]
971983
],
972984
[
@@ -983,6 +995,7 @@ foreach gen_mtargets : [
983995
VSX2,
984996
VXE, VX,
985997
LSX,
998+
RVV,
986999
]
9871000
],
9881001
[
@@ -994,6 +1007,7 @@ foreach gen_mtargets : [
9941007
ASIMD, NEON,
9951008
VXE, VX,
9961009
LSX,
1010+
RVV,
9971011
]
9981012
],
9991013
[
@@ -1004,6 +1018,7 @@ foreach gen_mtargets : [
10041018
VSX2,
10051019
ASIMD, NEON,
10061020
LSX,
1021+
RVV,
10071022
]
10081023
],
10091024
[
@@ -1015,6 +1030,7 @@ foreach gen_mtargets : [
10151030
VSX3, VSX2,
10161031
VXE, VX,
10171032
LSX,
1033+
RVV,
10181034
]
10191035
],
10201036
[
@@ -1026,6 +1042,7 @@ foreach gen_mtargets : [
10261042
VSX2,
10271043
VX,
10281044
LSX,
1045+
RVV,
10291046
]
10301047
],
10311048
]

numpy/_core/src/common/simd/intdiv.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
220220
divisor.val[0] = npyv_setall_u8(m);
221221
divisor.val[1] = npyv_setall_u8(sh1);
222222
divisor.val[2] = npyv_setall_u8(sh2);
223+
#elif defined(NPY_HAVE_RVV)
224+
divisor.val[0] = npyv_setall_u8(m);
225+
divisor.val[1] = npyv_setall_u8(sh1);
226+
divisor.val[2] = npyv_setall_u8(sh2);
223227
#else
224228
#error "please initialize the shifting operand for the new architecture"
225229
#endif
@@ -253,7 +257,7 @@ NPY_FINLINE npyv_s8x3 npyv_divisor_s8(npy_int8 d)
253257
npyv_s8x3 divisor;
254258
divisor.val[0] = npyv_setall_s8(m);
255259
divisor.val[2] = npyv_setall_s8(d < 0 ? -1 : 0);
256-
#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_LSX)
260+
#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_LSX) || defined(NPY_HAVE_RVV)
257261
divisor.val[1] = npyv_setall_s8(sh);
258262
#elif defined(NPY_HAVE_NEON)
259263
divisor.val[1] = npyv_setall_s8(-sh);
@@ -298,6 +302,9 @@ NPY_FINLINE npyv_u16x3 npyv_divisor_u16(npy_uint16 d)
298302
#elif defined(NPY_HAVE_LSX)
299303
divisor.val[1] = npyv_setall_u16(sh1);
300304
divisor.val[2] = npyv_setall_u16(sh2);
305+
#elif defined(NPY_HAVE_RVV)
306+
divisor.val[1] = npyv_setall_u16(sh1);
307+
divisor.val[2] = npyv_setall_u16(sh2);
301308
#else
302309
#error "please initialize the shifting operand for the new architecture"
303310
#endif
@@ -330,6 +337,8 @@ NPY_FINLINE npyv_s16x3 npyv_divisor_s16(npy_int16 d)
330337
divisor.val[1] = npyv_setall_s16(-sh);
331338
#elif defined(NPY_HAVE_LSX)
332339
divisor.val[1] = npyv_setall_s16(sh);
340+
#elif defined(NPY_HAVE_RVV)
341+
divisor.val[1] = npyv_setall_s16(sh);
333342
#else
334343
#error "please initialize the shifting operand for the new architecture"
335344
#endif
@@ -370,6 +379,9 @@ NPY_FINLINE npyv_u32x3 npyv_divisor_u32(npy_uint32 d)
370379
#elif defined(NPY_HAVE_LSX)
371380
divisor.val[1] = npyv_setall_u32(sh1);
372381
divisor.val[2] = npyv_setall_u32(sh2);
382+
#elif defined(NPY_HAVE_RVV)
383+
divisor.val[1] = npyv_setall_u32(sh1);
384+
divisor.val[2] = npyv_setall_u32(sh2);
373385
#else
374386
#error "please initialize the shifting operand for the new architecture"
375387
#endif
@@ -407,6 +419,8 @@ NPY_FINLINE npyv_s32x3 npyv_divisor_s32(npy_int32 d)
407419
divisor.val[1] = npyv_setall_s32(-sh);
408420
#elif defined(NPY_HAVE_LSX)
409421
divisor.val[1] = npyv_setall_s32(sh);
422+
#elif defined(NPY_HAVE_RVV)
423+
divisor.val[1] = npyv_setall_s32(sh);
410424
#else
411425
#error "please initialize the shifting operand for the new architecture"
412426
#endif
@@ -444,6 +458,9 @@ NPY_FINLINE npyv_u64x3 npyv_divisor_u64(npy_uint64 d)
444458
#elif defined(NPY_HAVE_LSX)
445459
divisor.val[1] = npyv_setall_u64(sh1);
446460
divisor.val[2] = npyv_setall_u64(sh2);
461+
#elif defined(NPY_HAVE_RVV)
462+
divisor.val[1] = npyv_setall_u64(sh1);
463+
divisor.val[2] = npyv_setall_u64(sh2);
447464
#else
448465
#error "please initialize the shifting operand for the new architecture"
449466
#endif
@@ -484,6 +501,8 @@ NPY_FINLINE npyv_s64x3 npyv_divisor_s64(npy_int64 d)
484501
divisor.val[1] = npyv_set_s64(sh);
485502
#elif defined(NPY_HAVE_LSX)
486503
divisor.val[1] = npyv_setall_s64(sh);
504+
#elif defined(NPY_HAVE_RVV)
505+
divisor.val[1] = npyv_setall_s64(sh);
487506
#else
488507
#error "please initialize the shifting operand for the new architecture"
489508
#endif

0 commit comments

Comments
 (0)