Skip to content

Commit 59b59de

Browse files
amane-ameixgbe00
andauthored
ENH: Add RVV SIMD backend (#11)
* SIMD: add rvv optimization for RISC-V * Rewrite rvv backend. * Fix load2 and store2. --------- Co-authored-by: ixgbe00 <yangwang@iscas.ac.cn>
1 parent 69f4df3 commit 59b59de

File tree

18 files changed

+2607
-19
lines changed

18 files changed

+2607
-19
lines changed

.github/workflows/linux_qemu.yml

Lines changed: 106 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,6 @@ jobs:
7373
"(test_kind or test_multiarray or test_simd or test_umath or test_ufunc) and not test_gcd_overflow",
7474
"s390x"
7575
]
76-
- [
77-
"riscv64",
78-
"riscv64-linux-gnu",
79-
"riscv64/ubuntu:22.04",
80-
"-Dallow-noblas=true",
81-
"test_kind or test_multiarray or test_simd or test_umath or test_ufunc",
82-
"riscv64"
83-
]
8476
env:
8577
TOOLCHAIN_NAME: ${{ matrix.BUILD_PROP[1] }}
8678
DOCKER_CONTAINER: ${{ matrix.BUILD_PROP[2] }}
@@ -170,7 +162,7 @@ jobs:
170162
'"
171163
172164
173-
linux_loongarch64_qemu:
165+
linux_loongarch64_riscv64_qemu:
174166
# Only workflow_dispatch is enabled on forks.
175167
# To enable this job and subsequent jobs on a fork for other events, comment out:
176168
if: github.repository == 'numpy/numpy' || github.event_name == 'workflow_dispatch'
@@ -267,3 +259,108 @@ jobs:
267259
/bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
268260
cd /numpy && spin test -- -k \"${RUNTIME_TEST_FILTER}\"
269261
'"
262+
263+
264+
linux_riscv64_qemu:
265+
# To enable this workflow on a fork, comment out:
266+
if: github.repository == 'numpy/numpy'
267+
runs-on: ubuntu-24.04
268+
continue-on-error: true
269+
strategy:
270+
fail-fast: false
271+
matrix:
272+
BUILD_PROP:
273+
- [
274+
"riscv64",
275+
"riscv64-linux-gnu",
276+
"riscv64/ubuntu:24.04",
277+
"-Dallow-noblas=true",
278+
"test_kind or test_multiarray or test_simd or test_umath or test_ufunc",
279+
"riscv64"
280+
]
281+
env:
282+
TOOLCHAIN_NAME: ${{ matrix.BUILD_PROP[1] }}
283+
DOCKER_CONTAINER: ${{ matrix.BUILD_PROP[2] }}
284+
MESON_OPTIONS: ${{ matrix.BUILD_PROP[3] }}
285+
RUNTIME_TEST_FILTER: ${{ matrix.BUILD_PROP[4] }}
286+
ARCH: ${{ matrix.BUILD_PROP[5] }}
287+
TERM: xterm-256color
288+
289+
name: "${{ matrix.BUILD_PROP[0] }}"
290+
steps:
291+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
292+
with:
293+
submodules: recursive
294+
fetch-tags: true
295+
persist-credentials: false
296+
297+
- name: Initialize binfmt_misc for qemu-user-static
298+
run: |
299+
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
300+
301+
- name: Install GCC cross-compilers
302+
run: |
303+
sudo apt update
304+
sudo apt install -y ninja-build gcc-14-${TOOLCHAIN_NAME} g++-14-${TOOLCHAIN_NAME} gfortran-14-${TOOLCHAIN_NAME}
305+
306+
- name: Cache docker container
307+
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
308+
id: container-cache
309+
with:
310+
path: ~/docker_${{ matrix.BUILD_PROP[1] }}
311+
key: container-${{ runner.os }}-${{ matrix.BUILD_PROP[1] }}-${{ matrix.BUILD_PROP[2] }}-${{ hashFiles('requirements/build_requirements.txt') }}
312+
313+
- name: Creates new container
314+
if: steps.container-cache.outputs.cache-hit != 'true'
315+
run: |
316+
docker run --platform=linux/${ARCH} --name the_container --interactive \
317+
-v /:/host -v $(pwd):/numpy ${DOCKER_CONTAINER} /bin/bash -c "
318+
apt update &&
319+
apt install -y cmake git python3 python-is-python3 python3-dev python3-pip &&
320+
mkdir -p /lib64 && ln -s /host/lib64/ld-* /lib64/ &&
321+
ln -s /host/lib/x86_64-linux-gnu /lib/x86_64-linux-gnu &&
322+
rm -rf /usr/${TOOLCHAIN_NAME} && ln -s /host/usr/${TOOLCHAIN_NAME} /usr/${TOOLCHAIN_NAME} &&
323+
rm -rf /usr/lib/gcc/${TOOLCHAIN_NAME} && ln -s /host/usr/lib/gcc-cross/${TOOLCHAIN_NAME} /usr/lib/gcc/${TOOLCHAIN_NAME} &&
324+
rm -f /usr/bin/gcc && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-gcc-14 /usr/bin/gcc &&
325+
rm -f /usr/bin/g++ && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-g++-14 /usr/bin/g++ &&
326+
rm -f /usr/bin/gfortran && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-gfortran-14 /usr/bin/gfortran &&
327+
rm -f /usr/bin/ar && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ar /usr/bin/ar &&
328+
rm -f /usr/bin/as && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-as /usr/bin/as &&
329+
rm -f /usr/bin/ld && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ld /usr/bin/ld &&
330+
rm -f /usr/bin/ld.bfd && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ld.bfd /usr/bin/ld.bfd &&
331+
rm -f /usr/bin/ninja && ln -s /host/usr/bin/ninja /usr/bin/ninja &&
332+
git config --global --add safe.directory /numpy &&
333+
# No need to build ninja from source, the host ninja is used for the build
334+
grep -v ninja /numpy/requirements/build_requirements.txt > /tmp/build_requirements.txt &&
335+
python -m pip install --break-system-packages -r /tmp/build_requirements.txt &&
336+
python -m pip install --break-system-packages pytest pytest-xdist hypothesis typing_extensions pytest-timeout &&
337+
rm -f /usr/local/bin/ninja && mkdir -p /usr/local/bin && ln -s /host/usr/bin/ninja /usr/local/bin/ninja
338+
"
339+
docker commit the_container the_container
340+
mkdir -p "~/docker_${TOOLCHAIN_NAME}"
341+
docker save -o "~/docker_${TOOLCHAIN_NAME}/the_container.tar" the_container
342+
343+
- name: Load container from cache
344+
if: steps.container-cache.outputs.cache-hit == 'true'
345+
run: docker load -i "~/docker_${TOOLCHAIN_NAME}/the_container.tar"
346+
347+
- name: Meson Build
348+
run: |
349+
docker run --rm --platform=linux/${ARCH} -e "TERM=xterm-256color" \
350+
-v $(pwd):/numpy -v /:/host the_container \
351+
/bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
352+
cd /numpy && spin build --clean -- ${MESON_OPTIONS}
353+
'"
354+
355+
- name: Meson Log
356+
if: always()
357+
run: 'cat build/meson-logs/meson-log.txt'
358+
359+
- name: Run Tests
360+
run: |
361+
docker run --rm --platform=linux/${ARCH} -e "TERM=xterm-256color" \
362+
-v $(pwd):/numpy -v /:/host the_container \
363+
/bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
364+
export F90=/usr/bin/gfortran
365+
cd /numpy && spin test -- --timeout=600 --durations=10 -k \"${RUNTIME_TEST_FILTER}\"
366+
'"

meson.options

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ option('test-simd', type: 'array',
3535
'VSX', 'VSX2', 'VSX3', 'VSX4',
3636
'NEON', 'ASIMD',
3737
'VX', 'VXE', 'VXE2',
38-
'LSX',
38+
'LSX', 'RVV',
3939
],
4040
description: 'Specify a list of CPU features to be tested against NumPy SIMD interface')
4141
option('test-simd-args', type: 'string', value: '',

meson_cpu/meson.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ min_features = {
9797
's390x': [],
9898
'arm': [],
9999
'aarch64': [ASIMD],
100-
'riscv64': [],
100+
'riscv64': [RVV],
101101
'wasm32': [],
102102
'loongarch64': [LSX],
103103
}.get(cpu_family, [])

numpy/_core/meson.build

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ if host_machine.cpu_family() == 'loongarch64'
103103
add_project_arguments(['-DHWY_COMPILE_ONLY_SCALAR'], language: ['cpp'])
104104
endif
105105

106+
if host_machine.cpu_family() == 'riscv64'
107+
add_project_arguments('-march=rv64gcv_zvl256b', '-mrvv-vector-bits=256', language: ['c','cpp'])
108+
endif
109+
106110
use_highway = not get_option('disable-highway')
107111
if use_highway and not fs.exists('src/highway/README.md')
108112
error('Missing the `highway` git submodule! Run `git submodule update --init` to fix this.')
@@ -750,6 +754,7 @@ _umath_tests_mtargets = mod_features.multi_targets(
750754
ASIMDHP, ASIMD, NEON,
751755
VSX3, VSX2, VSX,
752756
VXE, VX,
757+
RVV,
753758
],
754759
baseline: CPU_BASELINE,
755760
prefix: 'NPY_',
@@ -794,7 +799,8 @@ foreach gen_mtargets : [
794799
AVX512_SKX, AVX2, XOP, SSE42, SSE2,
795800
VSX2,
796801
ASIMD, NEON,
797-
VXE, VX
802+
VXE, VX,
803+
RVV,
798804
]
799805
],
800806
]
@@ -897,6 +903,7 @@ foreach gen_mtargets : [
897903
VSX3, VSX2,
898904
VXE, VX,
899905
LSX,
906+
RVV,
900907
]
901908
],
902909
[
@@ -908,6 +915,7 @@ foreach gen_mtargets : [
908915
VSX4, VSX2,
909916
VX,
910917
LSX,
918+
RVV,
911919
]
912920
],
913921
[
@@ -919,6 +927,7 @@ foreach gen_mtargets : [
919927
NEON,
920928
VXE, VX,
921929
LSX,
930+
RVV,
922931
]
923932
],
924933
[
@@ -937,6 +946,7 @@ foreach gen_mtargets : [
937946
NEON_VFPV4,
938947
VXE,
939948
LSX,
949+
RVV,
940950
]
941951
],
942952
[
@@ -960,6 +970,7 @@ foreach gen_mtargets : [
960970
VSX2,
961971
VXE, VX,
962972
LSX,
973+
RVV,
963974
]
964975
],
965976
[
@@ -978,6 +989,7 @@ foreach gen_mtargets : [
978989
NEON_VFPV4,
979990
VXE2, VXE,
980991
LSX,
992+
RVV,
981993
]
982994
],
983995
[
@@ -994,6 +1006,7 @@ foreach gen_mtargets : [
9941006
VSX2,
9951007
VXE, VX,
9961008
LSX,
1009+
RVV,
9971010
]
9981011
],
9991012
[
@@ -1005,6 +1018,7 @@ foreach gen_mtargets : [
10051018
ASIMD, NEON,
10061019
VXE, VX,
10071020
LSX,
1021+
RVV,
10081022
]
10091023
],
10101024
[
@@ -1015,6 +1029,7 @@ foreach gen_mtargets : [
10151029
VSX2,
10161030
ASIMD, NEON,
10171031
LSX,
1032+
RVV,
10181033
]
10191034
],
10201035
[
@@ -1026,6 +1041,7 @@ foreach gen_mtargets : [
10261041
VSX3, VSX2,
10271042
VXE, VX,
10281043
LSX,
1044+
RVV,
10291045
]
10301046
],
10311047
[
@@ -1037,6 +1053,7 @@ foreach gen_mtargets : [
10371053
VSX2,
10381054
VX,
10391055
LSX,
1056+
RVV,
10401057
]
10411058
],
10421059
]

numpy/_core/src/common/simd/intdiv.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
220220
divisor.val[0] = npyv_setall_u8(m);
221221
divisor.val[1] = npyv_setall_u8(sh1);
222222
divisor.val[2] = npyv_setall_u8(sh2);
223+
#elif defined(NPY_HAVE_RVV)
224+
divisor.val[0] = npyv_setall_u8(m);
225+
divisor.val[1] = npyv_setall_u8(sh1);
226+
divisor.val[2] = npyv_setall_u8(sh2);
223227
#else
224228
#error "please initialize the shifting operand for the new architecture"
225229
#endif
@@ -253,7 +257,7 @@ NPY_FINLINE npyv_s8x3 npyv_divisor_s8(npy_int8 d)
253257
npyv_s8x3 divisor;
254258
divisor.val[0] = npyv_setall_s8(m);
255259
divisor.val[2] = npyv_setall_s8(d < 0 ? -1 : 0);
256-
#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_LSX)
260+
#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_LSX) || defined(NPY_HAVE_RVV)
257261
divisor.val[1] = npyv_setall_s8(sh);
258262
#elif defined(NPY_HAVE_NEON)
259263
divisor.val[1] = npyv_setall_s8(-sh);
@@ -298,6 +302,9 @@ NPY_FINLINE npyv_u16x3 npyv_divisor_u16(npy_uint16 d)
298302
#elif defined(NPY_HAVE_LSX)
299303
divisor.val[1] = npyv_setall_u16(sh1);
300304
divisor.val[2] = npyv_setall_u16(sh2);
305+
#elif defined(NPY_HAVE_RVV)
306+
divisor.val[1] = npyv_setall_u16(sh1);
307+
divisor.val[2] = npyv_setall_u16(sh2);
301308
#else
302309
#error "please initialize the shifting operand for the new architecture"
303310
#endif
@@ -330,6 +337,8 @@ NPY_FINLINE npyv_s16x3 npyv_divisor_s16(npy_int16 d)
330337
divisor.val[1] = npyv_setall_s16(-sh);
331338
#elif defined(NPY_HAVE_LSX)
332339
divisor.val[1] = npyv_setall_s16(sh);
340+
#elif defined(NPY_HAVE_RVV)
341+
divisor.val[1] = npyv_setall_s16(sh);
333342
#else
334343
#error "please initialize the shifting operand for the new architecture"
335344
#endif
@@ -370,6 +379,9 @@ NPY_FINLINE npyv_u32x3 npyv_divisor_u32(npy_uint32 d)
370379
#elif defined(NPY_HAVE_LSX)
371380
divisor.val[1] = npyv_setall_u32(sh1);
372381
divisor.val[2] = npyv_setall_u32(sh2);
382+
#elif defined(NPY_HAVE_RVV)
383+
divisor.val[1] = npyv_setall_u32(sh1);
384+
divisor.val[2] = npyv_setall_u32(sh2);
373385
#else
374386
#error "please initialize the shifting operand for the new architecture"
375387
#endif
@@ -407,6 +419,8 @@ NPY_FINLINE npyv_s32x3 npyv_divisor_s32(npy_int32 d)
407419
divisor.val[1] = npyv_setall_s32(-sh);
408420
#elif defined(NPY_HAVE_LSX)
409421
divisor.val[1] = npyv_setall_s32(sh);
422+
#elif defined(NPY_HAVE_RVV)
423+
divisor.val[1] = npyv_setall_s32(sh);
410424
#else
411425
#error "please initialize the shifting operand for the new architecture"
412426
#endif
@@ -444,6 +458,9 @@ NPY_FINLINE npyv_u64x3 npyv_divisor_u64(npy_uint64 d)
444458
#elif defined(NPY_HAVE_LSX)
445459
divisor.val[1] = npyv_setall_u64(sh1);
446460
divisor.val[2] = npyv_setall_u64(sh2);
461+
#elif defined(NPY_HAVE_RVV)
462+
divisor.val[1] = npyv_setall_u64(sh1);
463+
divisor.val[2] = npyv_setall_u64(sh2);
447464
#else
448465
#error "please initialize the shifting operand for the new architecture"
449466
#endif
@@ -484,6 +501,8 @@ NPY_FINLINE npyv_s64x3 npyv_divisor_s64(npy_int64 d)
484501
divisor.val[1] = npyv_set_s64(sh);
485502
#elif defined(NPY_HAVE_LSX)
486503
divisor.val[1] = npyv_setall_s64(sh);
504+
#elif defined(NPY_HAVE_RVV)
505+
divisor.val[1] = npyv_setall_s64(sh);
487506
#else
488507
#error "please initialize the shifting operand for the new architecture"
489508
#endif

0 commit comments

Comments
 (0)