Skip to content

Commit 69f4df3

Browse files
authored
CI: Replace QEMU armhf with native (32-bit compatibility mode) (numpy#28653)
* CI: Tests NumPy on 32-bit ARM hard-float (armhf) via compatibility mode * BUG, SIMD: Fix floating-point errors with positive infinity input in sqrt on armhf Guards against passing positive infinity to vrsqrteq_f32 in sqrt operation, which would raise invalid floating-point errors on ARMv7 architectures. * TEST: Mark linspace subnormal test as xfail on ARM32 platforms Adds an xfail marker to the linspace subnormal test case for ARMv7 and AArch32 platforms. These platforms seem to flush subnormals to zero (FTZ) even when not explicitly enabled via the FPSCR register, causing the test to fail. * BUG, SIMD: Fix ARMv8 feature detection in 32-bit mode Fix detection of `FPHP`, `ASIMDHP`, `ASIMDDP`, `ASIMDFHM` features on ARMv8 32-bit mode (aarch32). Fix memory leaks in CPU feature detection on Android by adding missing free() calls. * CI: Remove QEMU-based armhf testing Remove QEMU-based armhf testing as we now use native 32-bit compatibility mode running on ARM64 GitHub runners in a separate implementation.
1 parent 77437b1 commit 69f4df3

File tree

7 files changed

+152
-85
lines changed

7 files changed

+152
-85
lines changed

.github/workflows/linux.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,51 @@ jobs:
199199
run: |
200200
spin test -j2 -m full -- --timeout=600 --durations=10
201201
202+
203+
armhf_test:
204+
# Tests NumPy on 32-bit ARM hard-float (armhf) via compatibility mode
205+
# running on aarch64 (ARM 64-bit) GitHub runners.
206+
needs: [smoke_test]
207+
if: github.repository == 'numpy/numpy'
208+
runs-on: ubuntu-22.04-arm
209+
steps:
210+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
211+
with:
212+
submodules: recursive
213+
fetch-tags: true
214+
persist-credentials: false
215+
216+
- name: Creates new container
217+
run: |
218+
docker run --name the_container --interactive \
219+
-v $(pwd):/numpy arm32v7/ubuntu:22.04 /bin/linux32 /bin/bash -c "
220+
apt update &&
221+
apt install -y ninja-build cmake git python3 python-is-python3 python3-dev python3-pip python3-venv &&
222+
python -m pip install -r /numpy/requirements/build_requirements.txt &&
223+
python -m pip install -r /numpy/requirements/test_requirements.txt
224+
"
225+
docker commit the_container the_container
226+
227+
- name: Meson Build
228+
run: |
229+
docker run --rm -e "TERM=xterm-256color" \
230+
-v $(pwd):/numpy the_container \
231+
/bin/script -e -q -c "/bin/linux32 /bin/bash --noprofile --norc -eo pipefail -c '
232+
cd /numpy && spin build
233+
'"
234+
235+
- name: Meson Log
236+
if: always()
237+
run: 'cat build/meson-logs/meson-log.txt'
238+
239+
- name: Run Tests
240+
run: |
241+
docker run --rm -e "TERM=xterm-256color" \
242+
-v $(pwd):/numpy the_container \
243+
/bin/script -e -q -c "/bin/linux32 /bin/bash --noprofile --norc -eo pipefail -c '
244+
cd /numpy && spin test -m full -- --timeout=600 --durations=10
245+
'"
246+
202247
benchmark:
203248
needs: [smoke_test]
204249
runs-on: ubuntu-latest

.github/workflows/linux_qemu.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,6 @@ jobs:
3838
fail-fast: false
3939
matrix:
4040
BUILD_PROP:
41-
- [
42-
"armhf",
43-
"arm-linux-gnueabihf",
44-
"arm32v7/ubuntu:22.04",
45-
"-Dallow-noblas=true",
46-
# test_unary_spurious_fpexception is currently skipped
47-
# FIXME(@seiko2plus): Requires confirmation for the following issue:
48-
# The presence of an FP invalid exception caused by sqrt. Unsure if this is a qemu bug or not.
49-
"(test_kind or test_multiarray or test_simd or test_umath or test_ufunc) and not test_unary_spurious_fpexception",
50-
"arm"
51-
]
5241
- [
5342
"ppc64le",
5443
"powerpc64le-linux-gnu",

numpy/_core/src/common/npy_cpu_features.c

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -772,34 +772,33 @@ npy__cpu_init_features_linux(void)
772772
#endif
773773
}
774774
#ifdef __arm__
775+
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
776+
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
777+
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
778+
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
779+
}
775780
// Detect Arm8 (aarch32 state)
776781
if ((hwcap2 & NPY__HWCAP2_AES) || (hwcap2 & NPY__HWCAP2_SHA1) ||
777782
(hwcap2 & NPY__HWCAP2_SHA2) || (hwcap2 & NPY__HWCAP2_PMULL) ||
778783
(hwcap2 & NPY__HWCAP2_CRC32))
779784
{
780-
hwcap = hwcap2;
785+
npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
786+
}
781787
#else
782-
if (1)
783-
{
784-
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
785-
// Is this could happen? maybe disabled by kernel
786-
// BTW this will break the baseline of AARCH64
787-
return 1;
788-
}
789-
#endif
790-
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
791-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
792-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
793-
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
794-
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
795-
npy__cpu_init_features_arm8();
796-
} else {
797-
npy__cpu_have[NPY_CPU_FEATURE_NEON] = (hwcap & NPY__HWCAP_NEON) != 0;
798-
if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
799-
npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = (hwcap & NPY__HWCAP_HALF) != 0;
800-
npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
801-
}
788+
if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
789+
// Is this could happen? maybe disabled by kernel
790+
// BTW this will break the baseline of AARCH64
791+
return 1;
802792
}
793+
npy__cpu_init_features_arm8();
794+
#endif
795+
npy__cpu_have[NPY_CPU_FEATURE_FPHP] = (hwcap & NPY__HWCAP_FPHP) != 0;
796+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = (hwcap & NPY__HWCAP_ASIMDHP) != 0;
797+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = (hwcap & NPY__HWCAP_ASIMDDP) != 0;
798+
npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
799+
#ifndef __arm__
800+
npy__cpu_have[NPY_CPU_FEATURE_SVE] = (hwcap & NPY__HWCAP_SVE) != 0;
801+
#endif
803802
return 1;
804803
}
805804
#endif

numpy/_core/src/common/npy_cpuinfo_parser.h

Lines changed: 69 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,43 @@
3636
#define NPY__HWCAP 16
3737
#define NPY__HWCAP2 26
3838

39-
// arch/arm/include/uapi/asm/hwcap.h
40-
#define NPY__HWCAP_HALF (1 << 1)
41-
#define NPY__HWCAP_NEON (1 << 12)
42-
#define NPY__HWCAP_VFPv3 (1 << 13)
43-
#define NPY__HWCAP_VFPv4 (1 << 16)
44-
#define NPY__HWCAP2_AES (1 << 0)
45-
#define NPY__HWCAP2_PMULL (1 << 1)
46-
#define NPY__HWCAP2_SHA1 (1 << 2)
47-
#define NPY__HWCAP2_SHA2 (1 << 3)
48-
#define NPY__HWCAP2_CRC32 (1 << 4)
49-
// arch/arm64/include/uapi/asm/hwcap.h
50-
#define NPY__HWCAP_FP (1 << 0)
51-
#define NPY__HWCAP_ASIMD (1 << 1)
52-
#define NPY__HWCAP_FPHP (1 << 9)
53-
#define NPY__HWCAP_ASIMDHP (1 << 10)
54-
#define NPY__HWCAP_ASIMDDP (1 << 20)
55-
#define NPY__HWCAP_SVE (1 << 22)
56-
#define NPY__HWCAP_ASIMDFHM (1 << 23)
57-
/*
39+
#ifdef __arm__
40+
// arch/arm/include/uapi/asm/hwcap.h
41+
#define NPY__HWCAP_HALF (1 << 1)
42+
#define NPY__HWCAP_NEON (1 << 12)
43+
#define NPY__HWCAP_VFPv3 (1 << 13)
44+
#define NPY__HWCAP_VFPv4 (1 << 16)
45+
46+
#define NPY__HWCAP_FPHP (1 << 22)
47+
#define NPY__HWCAP_ASIMDHP (1 << 23)
48+
#define NPY__HWCAP_ASIMDDP (1 << 24)
49+
#define NPY__HWCAP_ASIMDFHM (1 << 25)
50+
51+
#define NPY__HWCAP2_AES (1 << 0)
52+
#define NPY__HWCAP2_PMULL (1 << 1)
53+
#define NPY__HWCAP2_SHA1 (1 << 2)
54+
#define NPY__HWCAP2_SHA2 (1 << 3)
55+
#define NPY__HWCAP2_CRC32 (1 << 4)
56+
#else
57+
// arch/arm64/include/uapi/asm/hwcap.h
58+
#define NPY__HWCAP_FP (1 << 0)
59+
#define NPY__HWCAP_ASIMD (1 << 1)
60+
61+
#define NPY__HWCAP_FPHP (1 << 9)
62+
#define NPY__HWCAP_ASIMDHP (1 << 10)
63+
#define NPY__HWCAP_ASIMDDP (1 << 20)
64+
#define NPY__HWCAP_ASIMDFHM (1 << 23)
65+
66+
#define NPY__HWCAP_AES (1 << 3)
67+
#define NPY__HWCAP_PMULL (1 << 4)
68+
#define NPY__HWCAP_SHA1 (1 << 5)
69+
#define NPY__HWCAP_SHA2 (1 << 6)
70+
#define NPY__HWCAP_CRC32 (1 << 7)
71+
#define NPY__HWCAP_SVE (1 << 22)
72+
#endif
73+
74+
75+
/*
5876
* Get the size of a file by reading it until the end. This is needed
5977
* because files under /proc do not always return a valid size when
6078
* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
@@ -87,7 +105,7 @@ get_file_size(const char* pathname)
87105
return result;
88106
}
89107

90-
/*
108+
/*
91109
* Read the content of /proc/cpuinfo into a user-provided buffer.
92110
* Return the length of the data, or -1 on error. Does *not*
93111
* zero-terminate the content. Will not read more
@@ -123,7 +141,7 @@ read_file(const char* pathname, char* buffer, size_t buffsize)
123141
return count;
124142
}
125143

126-
/*
144+
/*
127145
* Extract the content of a the first occurrence of a given field in
128146
* the content of /proc/cpuinfo and return it as a heap-allocated
129147
* string that must be freed by the caller.
@@ -182,7 +200,7 @@ extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
182200
return result;
183201
}
184202

185-
/*
203+
/*
186204
* Checks that a space-separated list of items contains one given 'item'.
187205
* Returns 1 if found, 0 otherwise.
188206
*/
@@ -220,44 +238,51 @@ has_list_item(const char* list, const char* item)
220238
return 0;
221239
}
222240

223-
static void setHwcap(char* cpuFeatures, unsigned long* hwcap) {
224-
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
225-
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
226-
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
227-
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
228-
229-
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
230-
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
231-
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
232-
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
233-
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
234-
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
235-
}
236-
237241
static int
238242
get_feature_from_proc_cpuinfo(unsigned long *hwcap, unsigned long *hwcap2) {
239-
char* cpuinfo = NULL;
240-
int cpuinfo_len;
241-
cpuinfo_len = get_file_size("/proc/cpuinfo");
243+
*hwcap = 0;
244+
*hwcap2 = 0;
245+
246+
int cpuinfo_len = get_file_size("/proc/cpuinfo");
242247
if (cpuinfo_len < 0) {
243248
return 0;
244249
}
245-
cpuinfo = malloc(cpuinfo_len);
250+
char *cpuinfo = malloc(cpuinfo_len);
246251
if (cpuinfo == NULL) {
247252
return 0;
248253
}
254+
249255
cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
250-
char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
251-
if(cpuFeatures == NULL) {
256+
char *cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
257+
if (cpuFeatures == NULL) {
258+
free(cpuinfo);
252259
return 0;
253260
}
254-
setHwcap(cpuFeatures, hwcap);
255-
*hwcap2 |= *hwcap;
261+
*hwcap |= has_list_item(cpuFeatures, "fphp") ? NPY__HWCAP_FPHP : 0;
262+
*hwcap |= has_list_item(cpuFeatures, "asimdhp") ? NPY__HWCAP_ASIMDHP : 0;
263+
*hwcap |= has_list_item(cpuFeatures, "asimddp") ? NPY__HWCAP_ASIMDDP : 0;
264+
*hwcap |= has_list_item(cpuFeatures, "asimdfhm") ? NPY__HWCAP_ASIMDFHM : 0;
265+
#ifdef __arm__
266+
*hwcap |= has_list_item(cpuFeatures, "neon") ? NPY__HWCAP_NEON : 0;
267+
*hwcap |= has_list_item(cpuFeatures, "half") ? NPY__HWCAP_HALF : 0;
268+
*hwcap |= has_list_item(cpuFeatures, "vfpv3") ? NPY__HWCAP_VFPv3 : 0;
269+
*hwcap |= has_list_item(cpuFeatures, "vfpv4") ? NPY__HWCAP_VFPv4 : 0;
256270
*hwcap2 |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP2_AES : 0;
257271
*hwcap2 |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP2_PMULL : 0;
258272
*hwcap2 |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP2_SHA1 : 0;
259273
*hwcap2 |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP2_SHA2 : 0;
260274
*hwcap2 |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP2_CRC32 : 0;
275+
#else
276+
*hwcap |= has_list_item(cpuFeatures, "asimd") ? NPY__HWCAP_ASIMD : 0;
277+
*hwcap |= has_list_item(cpuFeatures, "fp") ? NPY__HWCAP_FP : 0;
278+
*hwcap |= has_list_item(cpuFeatures, "aes") ? NPY__HWCAP_AES : 0;
279+
*hwcap |= has_list_item(cpuFeatures, "pmull") ? NPY__HWCAP_PMULL : 0;
280+
*hwcap |= has_list_item(cpuFeatures, "sha1") ? NPY__HWCAP_SHA1 : 0;
281+
*hwcap |= has_list_item(cpuFeatures, "sha2") ? NPY__HWCAP_SHA2 : 0;
282+
*hwcap |= has_list_item(cpuFeatures, "crc32") ? NPY__HWCAP_CRC32 : 0;
283+
#endif
284+
free(cpuinfo);
285+
free(cpuFeatures);
261286
return 1;
262287
}
263288
#endif /* NUMPY_CORE_SRC_COMMON_NPY_CPUINFO_PARSER_H_ */

numpy/_core/src/common/simd/neon/math.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
2828
// Based on ARM doc, see https://developer.arm.com/documentation/dui0204/j/CIHDIACI
2929
NPY_FINLINE npyv_f32 npyv_sqrt_f32(npyv_f32 a)
3030
{
31+
const npyv_f32 one = vdupq_n_f32(1.0f);
3132
const npyv_f32 zero = vdupq_n_f32(0.0f);
3233
const npyv_u32 pinf = vdupq_n_u32(0x7f800000);
3334
npyv_u32 is_zero = vceqq_f32(a, zero), is_inf = vceqq_u32(vreinterpretq_u32_f32(a), pinf);
34-
// guard against floating-point division-by-zero error
35-
npyv_f32 guard_byz = vbslq_f32(is_zero, vreinterpretq_f32_u32(pinf), a);
35+
npyv_u32 is_special = vorrq_u32(is_zero, is_inf);
36+
// guard against division-by-zero and infinity input to vrsqrte to avoid invalid fp error
37+
npyv_f32 guard_byz = vbslq_f32(is_special, one, a);
3638
// estimate to (1/√a)
3739
npyv_f32 rsqrte = vrsqrteq_f32(guard_byz);
3840
/**
@@ -47,10 +49,8 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
4749
rsqrte = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a, rsqrte), rsqrte), rsqrte);
4850
// a * (1/√a)
4951
npyv_f32 sqrt = vmulq_f32(a, rsqrte);
50-
// return zero if the a is zero
51-
// - return zero if a is zero.
52-
// - return positive infinity if a is positive infinity
53-
return vbslq_f32(vorrq_u32(is_zero, is_inf), a, sqrt);
52+
// Handle special cases: return a for zeros and positive infinities
53+
return vbslq_f32(is_special, a, sqrt);
5454
}
5555
#endif // NPY_SIMD_F64
5656

numpy/_core/tests/test_cpu_features.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,11 @@ class Test_ARM_Features(AbstractTest):
407407
def load_flags(self):
408408
self.load_flags_cpuinfo("Features")
409409
arch = self.get_cpuinfo_item("CPU architecture")
410-
# in case of mounting virtual filesystem of aarch64 kernel
411-
is_rootfs_v8 = int('0' + next(iter(arch))) > 7 if arch else 0
410+
# in case of mounting virtual filesystem of aarch64 kernel without linux32
411+
is_rootfs_v8 = (
412+
not re.match("^armv[0-9]+l$", machine) and
413+
(int('0' + next(iter(arch))) > 7 if arch else 0)
414+
)
412415
if re.match("^(aarch64|AARCH64)", machine) or is_rootfs_v8:
413416
self.features_map = {
414417
"NEON": "ASIMD", "HALF": "ASIMD", "VFPV4": "ASIMD"

numpy/_core/tests/test_function_base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import sys
2-
2+
import platform
33
import pytest
44

55
import numpy as np
@@ -14,6 +14,9 @@
1414
IS_PYPY
1515
)
1616

17+
def _is_armhf():
18+
# Check if the current platform is ARMHF (32-bit ARM architecture)
19+
return platform.machine().startswith('arm') and platform.architecture()[0] == '32bit'
1720

1821
class PhysicalQuantity(float):
1922
def __new__(cls, value):
@@ -414,6 +417,9 @@ def __mul__(self, other):
414417

415418
assert_equal(linspace(one, five), linspace(1, 5))
416419

420+
# even when not explicitly enabled via FPSCR register
421+
@pytest.mark.xfail(_is_armhf(),
422+
reason="ARMHF/AArch32 platforms seem to FTZ subnormals")
417423
def test_denormal_numbers(self):
418424
# Regression test for gh-5437. Will probably fail when compiled
419425
# with ICC, which flushes denormals to zero

0 commit comments

Comments
 (0)