Skip to content

Commit e838094

Browse files
committed
Address review and fix test
1 parent 990c727 commit e838094

File tree

3 files changed

+18
-5
lines changed

3 files changed

+18
-5
lines changed

site/source/docs/porting/simd.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1220,7 +1220,9 @@ The following table highlights the availability and expected performance of diff
12201220
* - _mm_i64gather_epi64
12211221
- ❌ scalarized
12221222

1223-
All the 128-bit wide instructions from AVX2 instruction set are listed. Only a small part of the 256-bit AVX2 instruction set are listed, most of the 256-bit wide AVX2 instructions are emulated by two 128-bit wide instructions.
1223+
All the 128-bit wide instructions from AVX2 instruction set are listed.
1224+
Only a small part of the 256-bit AVX2 instruction set are listed, most of the
1225+
256-bit wide AVX2 instructions are emulated by two 128-bit wide instructions.
12241226

12251227
======================================================
12261228
Compiling SIMD code targeting ARM NEON instruction set

test/sse/test_avx2.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ void test_statisticsa(void) {
262262
Ret_M256i_M256i(__m256i, _mm256_avg_epu8);
263263
}
264264

265-
void test_shift(void) {
265+
// Split test_shift into two functions to reduce memory consumption
266+
void test_shift1(void) {
266267
Ret_M256i_Tint(__m256i, _mm256_slli_si256);
267268
Ret_M256i_Tint(__m256i, _mm256_bslli_epi128);
268269

@@ -277,7 +278,9 @@ void test_shift(void) {
277278
Ret_M256i_Tint(__m256i, _mm256_srai_epi32);
278279
Ret_M256i_M128i(__m256i, _mm256_sra_epi16);
279280
Ret_M256i_M128i(__m256i, _mm256_sra_epi32);
281+
}
280282

283+
void test_shift2(void) {
281284
Ret_M256i_Tint(__m256i, _mm256_srli_si256);
282285
Ret_M256i_Tint(__m256i, _mm256_bsrli_epi128);
283286

@@ -302,6 +305,11 @@ void test_shift(void) {
302305
Ret_M256i_M256i(__m256i, _mm256_srlv_epi64);
303306
}
304307

308+
void test_shift(void) {
309+
test_shift1();
310+
test_shift2();
311+
}
312+
305313
int main() {
306314
assert(numInterestingFloats % 8 == 0);
307315
assert(numInterestingInts % 8 == 0);

test/test_other.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9441,7 +9441,8 @@ def test_standalone_system_headers(self):
94419441
print('header: ' + header)
94429442
# These headers cannot be included in isolation.
94439443
# e.g: error: unknown type name 'EGLDisplay'
9444-
if header in ['eglext.h', 'SDL_config_macosx.h', 'glext.h', 'gl2ext.h']:
9444+
# Don't include avxintrin.h and avx2inrin.h directly, include immintrin.h instead
9445+
if header in ['eglext.h', 'SDL_config_macosx.h', 'glext.h', 'gl2ext.h', 'avxintrin.h', 'avx2intrin.h']:
94459446
continue
94469447
# These headers are C++ only and cannot be included from C code.
94479448
# But we still want to check they can be included on there own without
@@ -9450,12 +9451,14 @@ def test_standalone_system_headers(self):
94509451
'wire.h', 'val.h', 'bind.h',
94519452
'webgpu_cpp.h', 'webgpu_cpp_chained_struct.h', 'webgpu_enum_class_bitmasks.h',
94529453
# Some headers are not yet C compatible
9453-
'arm_neon.h', 'avxintrin.h', 'immintrin.h',
9454+
'arm_neon.h', 'immintrin.h',
94549455
]
94559456
if directory and directory != 'compat':
94569457
header = f'{directory}/{header}'
94579458
inc = f'#include <{header}>\n__attribute__((weak)) int foo;\n'
9458-
cflags = ['-Werror', '-Wall', '-pedantic', '-mavx', '-msimd128', '-msse3']
9459+
cflags = ['-Werror', '-Wall', '-pedantic', '-msimd128', '-msse4']
9460+
if header == 'immintrin.h':
9461+
cflags.append('-mavx2')
94599462
if cxx_only:
94609463
create_file('a.cxx', inc)
94619464
create_file('b.cxx', inc)

0 commit comments

Comments
 (0)