Skip to content

Commit 1551fa2

Browse files
committed
Address review and fix test
1 parent bc46a93 commit 1551fa2

File tree

3 files changed

+18
-5
lines changed

3 files changed

+18
-5
lines changed

site/source/docs/porting/simd.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,9 @@ The following table highlights the availability and expected performance of diff
12201220
* - _mm_i64gather_epi64
12211221
- ❌ scalarized
12221222

1223-
All the 128-bit wide instructions from AVX2 instruction set are listed. Only a small part of the 256-bit AVX2 instruction set are listed, most of the 256-bit wide AVX2 instructions are emulated by two 128-bit wide instructions.
1223+
All the 128-bit wide instructions from AVX2 instruction set are listed.
1224+
Only a small part of the 256-bit AVX2 instruction set are listed, most of the
1225+
256-bit wide AVX2 instructions are emulated by two 128-bit wide instructions.
12241226

12251227
======================================================
12261228
Compiling SIMD code targeting ARM NEON instruction set

test/sse/test_avx2.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ void test_statisticsa(void) {
262262
Ret_M256i_M256i(__m256i, _mm256_avg_epu8);
263263
}
264264

265-
void test_shift(void) {
265+
// Split test_shift into two functions to reduce memory consumption
266+
void test_shift1(void) {
266267
Ret_M256i_Tint(__m256i, _mm256_slli_si256);
267268
Ret_M256i_Tint(__m256i, _mm256_bslli_epi128);
268269

@@ -277,7 +278,9 @@ void test_shift(void) {
277278
Ret_M256i_Tint(__m256i, _mm256_srai_epi32);
278279
Ret_M256i_M128i(__m256i, _mm256_sra_epi16);
279280
Ret_M256i_M128i(__m256i, _mm256_sra_epi32);
281+
}
280282

283+
void test_shift2(void) {
281284
Ret_M256i_Tint(__m256i, _mm256_srli_si256);
282285
Ret_M256i_Tint(__m256i, _mm256_bsrli_epi128);
283286

@@ -302,6 +305,11 @@ void test_shift(void) {
302305
Ret_M256i_M256i(__m256i, _mm256_srlv_epi64);
303306
}
304307

308+
void test_shift(void) {
309+
test_shift1();
310+
test_shift2();
311+
}
312+
305313
int main() {
306314
assert(numInterestingFloats % 8 == 0);
307315
assert(numInterestingInts % 8 == 0);

test/test_other.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9465,7 +9465,8 @@ def test_standalone_system_headers(self):
94659465
print('header: ' + header)
94669466
# These headers cannot be included in isolation.
94679467
# e.g: error: unknown type name 'EGLDisplay'
9468-
if header in ['eglext.h', 'SDL_config_macosx.h', 'glext.h', 'gl2ext.h']:
9468+
# Don't include avxintrin.h and avx2inrin.h directly, include immintrin.h instead
9469+
if header in ['eglext.h', 'SDL_config_macosx.h', 'glext.h', 'gl2ext.h', 'avxintrin.h', 'avx2intrin.h']:
94699470
continue
94709471
# These headers are C++ only and cannot be included from C code.
94719472
# But we still want to check they can be included on there own without
@@ -9474,12 +9475,14 @@ def test_standalone_system_headers(self):
94749475
'wire.h', 'val.h', 'bind.h',
94759476
'webgpu_cpp.h', 'webgpu_cpp_chained_struct.h', 'webgpu_enum_class_bitmasks.h',
94769477
# Some headers are not yet C compatible
9477-
'arm_neon.h', 'avxintrin.h', 'immintrin.h',
9478+
'arm_neon.h', 'immintrin.h',
94789479
]
94799480
if directory and directory != 'compat':
94809481
header = f'{directory}/{header}'
94819482
inc = f'#include <{header}>\n__attribute__((weak)) int foo;\n'
9482-
cflags = ['-Werror', '-Wall', '-pedantic', '-mavx', '-msimd128', '-msse3']
9483+
cflags = ['-Werror', '-Wall', '-pedantic', '-msimd128', '-msse4']
9484+
if header == 'immintrin.h':
9485+
cflags.append('-mavx2')
94839486
if cxx_only:
94849487
create_file('a.cxx', inc)
94859488
create_file('b.cxx', inc)

0 commit comments

Comments
 (0)