From 59c47e09463b616179a41bc1fd35800032999281 Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 7 Jan 2021 13:38:04 -0800 Subject: [PATCH 1/5] x86: correct compiler flags for SSE It is possible to enable SSE without using SSE for floating point, so fix the compiler flags. Signed-off-by: Daniel Leung --- arch/x86/ia32.cmake | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/ia32.cmake b/arch/x86/ia32.cmake index 1f8134b872b4d..3e328b25fe5a5 100644 --- a/arch/x86/ia32.cmake +++ b/arch/x86/ia32.cmake @@ -21,7 +21,18 @@ if(CMAKE_C_COMPILER_ID STREQUAL "Clang") ) endif() -zephyr_cc_option_ifndef(CONFIG_SSE_FP_MATH -mno-sse) +if(CONFIG_SSE) + zephyr_cc_option(-msse) + + if(CONFIG_SSE_FP_MATH) + zephyr_cc_option(-mfpmath=sse) + else() + zephyr_cc_option(-mfpmath=387) + endif() + +else() + zephyr_cc_option(-mno-sse) +endif() if(CMAKE_VERBOSE_MAKEFILE) set(GENIDT_EXTRA_ARGS --verbose) From e065e6f57dc6f3d730f76264d799ea2b47910b7e Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 7 Jan 2021 13:43:32 -0800 Subject: [PATCH 2/5] tests: fpu_sharing: test both FPU and SSE on x86 On x86, it is possible to use FPU (387) and SSE for floating point operations. So test both. Signed-off-by: Daniel Leung --- tests/kernel/fpu_sharing/float_disable/prj_x86.conf | 1 - tests/kernel/fpu_sharing/float_disable/testcase.yaml | 10 +++++++++- tests/kernel/fpu_sharing/generic/prj_x86.conf | 1 - tests/kernel/fpu_sharing/generic/testcase.yaml | 12 +++++++++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/kernel/fpu_sharing/float_disable/prj_x86.conf b/tests/kernel/fpu_sharing/float_disable/prj_x86.conf index 1bc68cde9ae8d..4cb3a46c776c0 100644 --- a/tests/kernel/fpu_sharing/float_disable/prj_x86.conf +++ b/tests/kernel/fpu_sharing/float_disable/prj_x86.conf @@ -3,4 +3,3 @@ CONFIG_TEST_USERSPACE=y CONFIG_FPU=y CONFIG_FPU_SHARING=y CONFIG_SSE=y -CONFIG_SSE_FP_MATH=y diff --git a/tests/kernel/fpu_sharing/float_disable/testcase.yaml b/tests/kernel/fpu_sharing/float_disable/testcase.yaml index 19af97ab822f2..9e762c916cd34 100644 --- a/tests/kernel/fpu_sharing/float_disable/testcase.yaml +++ b/tests/kernel/fpu_sharing/float_disable/testcase.yaml @@ -17,7 +17,15 @@ tests: filter: CONFIG_CPU_HAS_FPU arch_allow: sparc tags: kernel userspace - kernel.fpu_sharing.float_disable.x86: + kernel.fpu_sharing.float_disable.x86.fpu: extra_args: CONF_FILE=prj_x86.conf + extra_configs: + - CONFIG_SSE_FP_MATH=n + platform_allow: qemu_x86 + tags: kernel userspace + kernel.fpu_sharing.float_disable.x86.sse: + extra_args: CONF_FILE=prj_x86.conf + extra_configs: + - CONFIG_SSE_FP_MATH=y platform_allow: qemu_x86 tags: kernel userspace diff --git a/tests/kernel/fpu_sharing/generic/prj_x86.conf b/tests/kernel/fpu_sharing/generic/prj_x86.conf index a341e4a17f8a6..d1828266a604d 100644 --- a/tests/kernel/fpu_sharing/generic/prj_x86.conf +++ b/tests/kernel/fpu_sharing/generic/prj_x86.conf @@ -2,5 +2,4 @@ CONFIG_ZTEST=y CONFIG_FPU=y CONFIG_SSE=y CONFIG_FPU_SHARING=y -CONFIG_SSE_FP_MATH=y CONFIG_STDOUT_CONSOLE=y diff --git a/tests/kernel/fpu_sharing/generic/testcase.yaml b/tests/kernel/fpu_sharing/generic/testcase.yaml index f3eff516da8e6..86dfebd2b032e 100644 --- a/tests/kernel/fpu_sharing/generic/testcase.yaml +++ b/tests/kernel/fpu_sharing/generic/testcase.yaml @@ -33,8 +33,18 @@ tests: arch_allow: sparc tags: kernel timeout: 600 - kernel.fpu_sharing.generic.x86: + kernel.fpu_sharing.generic.x86.fpu: extra_args: CONF_FILE=prj_x86.conf + extra_configs: + - CONFIG_SSE_FP_MATH=n + platform_allow: qemu_x86 + slow: true + tags: kernel + timeout: 600 + kernel.fpu_sharing.generic.x86.sse: + extra_args: CONF_FILE=prj_x86.conf + extra_configs: + - CONFIG_SSE_FP_MATH=y platform_allow: qemu_x86 slow: true tags: kernel From 7b9cc23877e7b845dd325aac383d5f09bb672999 Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 7 Jan 2021 15:51:18 -0800 Subject: [PATCH 3/5] tests: benchmarks/app_kernel: enable for floating point The app_kernel benchmarking app has the config file for benchmarking with floating point enabled, but it was never used. So add it to the testcase.yaml. Note that this also limits to run on one CPU on a SMP system as the resulting numbers would be more consistent among runs. Signed-off-by: Daniel Leung --- tests/benchmarks/app_kernel/prj_fp.conf | 4 +-- tests/benchmarks/app_kernel/testcase.yaml | 33 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/benchmarks/app_kernel/prj_fp.conf b/tests/benchmarks/app_kernel/prj_fp.conf index c7f1ff4230593..215cf57a3da4e 100644 --- a/tests/benchmarks/app_kernel/prj_fp.conf +++ b/tests/benchmarks/app_kernel/prj_fp.conf @@ -4,9 +4,7 @@ CONFIG_STDOUT_CONSOLE=y CONFIG_MAIN_THREAD_PRIORITY=6 CONFIG_FPU=y -CONFIG_SSE=y CONFIG_FPU_SHARING=y -CONFIG_SSE_FP_MATH=y # eliminate timer interrupts during the benchmark CONFIG_SYS_CLOCK_TICKS_PER_SEC=1 @@ -17,3 +15,5 @@ CONFIG_FORCE_NO_ASSERT=y # Disable HW Stack Protection (see #28664) CONFIG_TEST_HW_STACK_PROTECTION=n CONFIG_HW_STACK_PROTECTION=n + +CONFIG_MP_NUM_CPUS=1 diff --git a/tests/benchmarks/app_kernel/testcase.yaml b/tests/benchmarks/app_kernel/testcase.yaml index 8860913f4be63..c0d338173903b 100644 --- a/tests/benchmarks/app_kernel/testcase.yaml +++ b/tests/benchmarks/app_kernel/testcase.yaml @@ -5,6 +5,39 @@ tests: min_ram: 32 tags: benchmark timeout: 300 + benchmark.kernel.application.fp.arm: + extra_args: CONF_FILE=prj_fp.conf + arch_allow: arm + filter: CONFIG_ARMV7_M_ARMV8_M_FP + min_flash: 34 + min_ram: 32 + tags: benchmark + slow: true + timeout: 300 + benchmark.kernel.application.fp.x86.fpu: + extra_args: CONF_FILE=prj_fp.conf + extra_configs: + - CONFIG_SSE=y + - CONFIG_SSE_FP_MATH=n + arch_allow: x86 + filter: CONFIG_CPU_HAS_FPU + min_flash: 34 + min_ram: 32 + tags: benchmark + slow: true + timeout: 300 + benchmark.kernel.application.fp.x86.sse: + extra_args: CONF_FILE=prj_fp.conf + extra_configs: + - CONFIG_SSE=y + - CONFIG_SSE_FP_MATH=y + arch_allow: x86 + filter: CONFIG_CPU_HAS_FPU + min_flash: 34 + min_ram: 32 + tags: benchmark + slow: true + timeout: 300 benchmark.kernel.application.posix: arch_allow: posix min_ram: 32 From fc19aa25b403d0c736401b5b86847a6715349356 Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 7 Jan 2021 15:07:29 -0800 Subject: [PATCH 4/5] x86: rename CONFIG_SSE* to CONFIG_X86_SSE* This adds X86 keyword to the kconfigs to indicate these are for x86. The old options are still there marked as deprecated. Signed-off-by: Daniel Leung --- arch/x86/Kconfig | 25 +++++++++++++++++++ arch/x86/core/Kconfig.ia32 | 16 ++++-------- arch/x86/core/ia32/crt0.S | 20 +++++++-------- arch/x86/core/ia32/float.c | 4 +-- arch/x86/core/ia32/swap.S | 14 +++++------ arch/x86/ia32.cmake | 4 +-- arch/x86/include/ia32/kernel_arch_data.h | 2 +- doc/reference/kernel/other/float.rst | 4 +-- include/arch/x86/ia32/thread.h | 10 ++++---- include/kernel.h | 2 +- tests/benchmarks/app_kernel/testcase.yaml | 8 +++--- .../fpu_sharing/float_disable/prj_x86.conf | 2 +- .../fpu_sharing/float_disable/testcase.yaml | 4 +-- tests/kernel/fpu_sharing/generic/prj_x86.conf | 2 +- tests/kernel/fpu_sharing/generic/src/main.c | 2 +- .../kernel/fpu_sharing/generic/testcase.yaml | 4 +-- 16 files changed, 71 insertions(+), 52 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96619ce8bc25d..c4d59a2d842d3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -48,6 +48,31 @@ config X86_64 select SCHED_IPI_SUPPORTED select X86_MMU +menu "x86 Features" + +config X86_SSE + bool "Enable SSE Support" + depends on FPU + help + This option enables SSE support, and the use of SSE registers + by threads. + +config X86_SSE_FP_MATH + bool "Compiler-generated SSEx instructions for floating point math" + depends on X86_SSE + help + This option allows the compiler to generate SSEx instructions for + performing floating point math. This can greatly improve performance + when exactly the same operations are to be performed on multiple + data objects; however, it can also significantly reduce performance + when preemptive task switches occur because of the larger register + set that must be saved and restored. + + Disabling this option means that the compiler utilizes only the + x87 instruction set for floating point operations. + +endmenu + config X86_KERNEL_OFFSET int "Kernel offset from beginning of RAM" default 1048576 diff --git a/arch/x86/core/Kconfig.ia32 b/arch/x86/core/Kconfig.ia32 index 864318a5f8f78..0fee770c46331 100644 --- a/arch/x86/core/Kconfig.ia32 +++ b/arch/x86/core/Kconfig.ia32 @@ -92,22 +92,16 @@ if CPU_HAS_FPU config SSE bool "SSE registers" depends on FPU + select X86_SSE help - This option enables the use of SSE registers by threads. + This option is deprecated. Please use CONFIG_X86_SSE instead. config SSE_FP_MATH bool "Compiler-generated SSEx instructions" - depends on SSE + depends on X86_SSE + select X86_SSE_FP_MATH help - This option allows the compiler to generate SSEx instructions for - performing floating point math. This can greatly improve performance - when exactly the same operations are to be performed on multiple - data objects; however, it can also significantly reduce performance - when preemptive task switches occur because of the larger register - set that must be saved and restored. - - Disabling this option means that the compiler utilizes only the - x87 instruction set for floating point operations. + This option is deprecated. Please use CONFIG_X86_SSE_FP_MATH instead. config EAGER_FPU_SHARING bool diff --git a/arch/x86/core/ia32/crt0.S b/arch/x86/core/ia32/crt0.S index 2f2cbaa328daf..58ac45b447c68 100644 --- a/arch/x86/core/ia32/crt0.S +++ b/arch/x86/core/ia32/crt0.S @@ -34,7 +34,7 @@ #endif -#if defined(CONFIG_SSE) +#if defined(CONFIG_X86_SSE) GDATA(_sse_mxcsr_default_value) #endif @@ -101,7 +101,7 @@ __csSet: fninit /* set x87 FPU to its default state */ - #if defined(CONFIG_SSE) + #if defined(CONFIG_X86_SSE) /* * Permit use of SSE instructions * @@ -116,7 +116,7 @@ __csSet: ldmxcsr _sse_mxcsr_default_value /* initialize SSE control/status reg */ - #endif /* CONFIG_SSE */ + #endif /* CONFIG_X86_SSE */ #endif /* !CONFIG_FPU */ @@ -236,7 +236,7 @@ __csSet: _x86_bss_zero: /* ECX = size, EDI = starting address */ -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE /* use XMM register to clear 16 bytes at a time */ pxor %xmm0, %xmm0 /* zero out xmm0 register */ @@ -258,7 +258,7 @@ bssWords: rep stosl /* zero memory per 4 bytes */ -#else /* !CONFIG_SSE */ +#else /* !CONFIG_X86_SSE */ /* clear out BSS double words (32-bits at a time) */ @@ -267,13 +267,13 @@ bssWords: rep stosl /* zero memory per 4 bytes */ -#endif /* CONFIG_SSE */ +#endif /* CONFIG_X86_SSE */ ret #ifdef CONFIG_XIP _x86_data_copy: /* EDI = dest, ESI = source, ECX = size in 32-bit chunks */ - #ifdef CONFIG_SSE + #ifdef CONFIG_X86_SSE /* copy 16 bytes at a time using XMM until < 16 bytes remain */ movl %ecx ,%edx /* save number of quad bytes */ @@ -290,7 +290,7 @@ dataDQ: dataWords: movl %edx, %ecx /* restore # quad bytes */ andl $0x3, %ecx /* only need to copy at most 3 quad bytes */ - #endif /* CONFIG_SSE */ + #endif /* CONFIG_X86_SSE */ rep movsl /* copy data 4 bytes at a time */ @@ -298,14 +298,14 @@ dataWords: #endif /* CONFIG_XIP */ -#if defined(CONFIG_SSE) +#if defined(CONFIG_X86_SSE) /* SSE control & status register initial value */ _sse_mxcsr_default_value: .long 0x1f80 /* all SSE exceptions clear & masked */ -#endif /* CONFIG_SSE */ +#endif /* CONFIG_X86_SSE */ /* Interrupt Descriptor Table (IDT) definition */ diff --git a/arch/x86/core/ia32/float.c b/arch/x86/core/ia32/float.c index ea860aa70fc88..3f52232041928 100644 --- a/arch/x86/core/ia32/float.c +++ b/arch/x86/core/ia32/float.c @@ -145,7 +145,7 @@ static inline void z_do_sse_regs_init(void) */ static void FpCtxSave(struct k_thread *thread) { -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE if ((thread->base.user_options & K_SSE_REGS) != 0) { z_do_fp_and_sse_regs_save(&thread->arch.preempFloatReg); return; @@ -163,7 +163,7 @@ static void FpCtxSave(struct k_thread *thread) static inline void FpCtxInit(struct k_thread *thread) { z_do_fp_regs_init(); -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE if ((thread->base.user_options & K_SSE_REGS) != 0) { z_do_sse_regs_init(); } diff --git a/arch/x86/core/ia32/swap.S b/arch/x86/core/ia32/swap.S index 1fb132ab45db9..11040132f6cbb 100644 --- a/arch/x86/core/ia32/swap.S +++ b/arch/x86/core/ia32/swap.S @@ -131,18 +131,18 @@ SECTION_FUNC(TEXT, arch_swap) * switch. */ /* Save outgpoing thread context */ -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE fxsave _thread_offset_to_preempFloatReg(%edx) fninit #else fnsave _thread_offset_to_preempFloatReg(%edx) #endif /* Restore incoming thread context */ -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE fxrstor _thread_offset_to_preempFloatReg(%eax) #else frstor _thread_offset_to_preempFloatReg(%eax) -#endif /* CONFIG_SSE */ +#endif /* CONFIG_X86_SSE */ #elif defined(CONFIG_LAZY_FPU_SHARING) /* * Clear the CR0[TS] bit (in the event the current thread @@ -206,7 +206,7 @@ SECTION_FUNC(TEXT, arch_swap) je restoreContext_NoFloatSave -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE testb $K_SSE_REGS, _thread_offset_to_user_options(%ebx) je x87FloatSave @@ -221,7 +221,7 @@ SECTION_FUNC(TEXT, arch_swap) jmp floatSaveDone x87FloatSave: -#endif /* CONFIG_SSE */ +#endif /* CONFIG_X86_SSE */ /* 'fnsave' performs an implicit 'fninit' after saving state! */ @@ -245,7 +245,7 @@ restoreContext_NoFloatSave: testb $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%eax) je restoreContext_NoFloatRestore -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE testb $K_SSE_REGS, _thread_offset_to_user_options(%eax) je x87FloatRestore @@ -254,7 +254,7 @@ restoreContext_NoFloatSave: x87FloatRestore: -#endif /* CONFIG_SSE */ +#endif /* CONFIG_X86_SSE */ frstor _thread_offset_to_preempFloatReg(%eax) diff --git a/arch/x86/ia32.cmake b/arch/x86/ia32.cmake index 3e328b25fe5a5..5bb999d2c201d 100644 --- a/arch/x86/ia32.cmake +++ b/arch/x86/ia32.cmake @@ -21,10 +21,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "Clang") ) endif() -if(CONFIG_SSE) +if(CONFIG_X86_SSE) zephyr_cc_option(-msse) - if(CONFIG_SSE_FP_MATH) + if(CONFIG_X86_SSE_FP_MATH) zephyr_cc_option(-mfpmath=sse) else() zephyr_cc_option(-mfpmath=387) diff --git a/arch/x86/include/ia32/kernel_arch_data.h b/arch/x86/include/ia32/kernel_arch_data.h index 49dc1981fe9c5..449578588c022 100644 --- a/arch/x86/include/ia32/kernel_arch_data.h +++ b/arch/x86/include/ia32/kernel_arch_data.h @@ -44,7 +44,7 @@ #define _THREAD_WRAPPER_REQUIRED #endif -#if defined(CONFIG_LAZY_FPU_SHARING) && defined(CONFIG_SSE) +#if defined(CONFIG_LAZY_FPU_SHARING) && defined(CONFIG_X86_SSE) #define _FP_USER_MASK (K_FP_REGS | K_SSE_REGS) #elif defined(CONFIG_LAZY_FPU_SHARING) #define _FP_USER_MASK (K_FP_REGS) diff --git a/doc/reference/kernel/other/float.rst b/doc/reference/kernel/other/float.rst index 7043bfb3e4d44..201732deb3f5a 100644 --- a/doc/reference/kernel/other/float.rst +++ b/doc/reference/kernel/other/float.rst @@ -303,8 +303,8 @@ Also, ensure that any thread that uses the floating point registers has sufficient added stack space for saving floating point register values during context switches, as described above. -Use the :option:`CONFIG_SSE` configuration option to enable support for -SSEx instructions (x86 only). +For x86, use the :option:`CONFIG_X86_SSE` configuration option to enable +support for SSEx instructions. API Reference ************* diff --git a/include/arch/x86/ia32/thread.h b/include/arch/x86/ia32/thread.h index 756202a0b2279..937f7c958c06f 100644 --- a/include/arch/x86/ia32/thread.h +++ b/include/arch/x86/ia32/thread.h @@ -27,7 +27,7 @@ * cases a 4 byte boundary is sufficient. */ #if defined(CONFIG_EAGER_FPU_SHARING) || defined(CONFIG_LAZY_FPU_SHARING) -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE #define FP_REG_SET_ALIGN 16 #else #define FP_REG_SET_ALIGN 4 @@ -82,7 +82,7 @@ typedef struct _callee_saved _callee_saved_t; * The macros CONFIG_{LAZY|EAGER}_FPU_SHARING shall be set to indicate that the * saving/restoring of the traditional x87 floating point (and MMX) registers * are supported by the kernel's context swapping code. The macro - * CONFIG_SSE shall _also_ be set if saving/restoring of the XMM + * CONFIG_X86_SSE shall _also_ be set if saving/restoring of the XMM * registers is also supported in the kernel's context swapping code. */ @@ -120,7 +120,7 @@ typedef struct s_FpRegSet { /* # of bytes: name of register */ tFpReg fpReg[8]; /* 80 : ST0 -> ST7 */ } tFpRegSet __aligned(FP_REG_SET_ALIGN); -#ifdef CONFIG_SSE +#ifdef CONFIG_X86_SSE /* definition of a single x87 (floating point / MMX) register */ @@ -168,12 +168,12 @@ typedef struct s_FpRegSetEx /* # of bytes: name of register */ unsigned char rsrvd3[176]; /* 176 : reserved */ } tFpRegSetEx __aligned(FP_REG_SET_ALIGN); -#else /* CONFIG_SSE == 0 */ +#else /* CONFIG_X86_SSE == 0 */ typedef struct s_FpRegSetEx { } tFpRegSetEx; -#endif /* CONFIG_SSE == 0 */ +#endif /* CONFIG_X86_SSE == 0 */ #else /* !CONFIG_LAZY_FPU_SHARING && !CONFIG_EAGER_FPU_SHARING */ diff --git a/include/kernel.h b/include/kernel.h index b576cd082267a..2856f17c97fb7 100644 --- a/include/kernel.h +++ b/include/kernel.h @@ -236,7 +236,7 @@ extern void k_thread_foreach_unlocked( #ifdef CONFIG_X86 /* x86 Bitmask definitions for threads user options */ -#if defined(CONFIG_FPU_SHARING) && defined(CONFIG_SSE) +#if defined(CONFIG_FPU_SHARING) && defined(CONFIG_X86_SSE) /* thread uses SSEx (and also FP) registers */ #define K_SSE_REGS (BIT(7)) #endif diff --git a/tests/benchmarks/app_kernel/testcase.yaml b/tests/benchmarks/app_kernel/testcase.yaml index c0d338173903b..2f1ce40f0fcfd 100644 --- a/tests/benchmarks/app_kernel/testcase.yaml +++ b/tests/benchmarks/app_kernel/testcase.yaml @@ -17,8 +17,8 @@ tests: benchmark.kernel.application.fp.x86.fpu: extra_args: CONF_FILE=prj_fp.conf extra_configs: - - CONFIG_SSE=y - - CONFIG_SSE_FP_MATH=n + - CONFIG_X86_SSE=y + - CONFIG_X86_SSE_FP_MATH=n arch_allow: x86 filter: CONFIG_CPU_HAS_FPU min_flash: 34 @@ -29,8 +29,8 @@ tests: benchmark.kernel.application.fp.x86.sse: extra_args: CONF_FILE=prj_fp.conf extra_configs: - - CONFIG_SSE=y - - CONFIG_SSE_FP_MATH=y + - CONFIG_X86_SSE=y + - CONFIG_X86_SSE_FP_MATH=y arch_allow: x86 filter: CONFIG_CPU_HAS_FPU min_flash: 34 diff --git a/tests/kernel/fpu_sharing/float_disable/prj_x86.conf b/tests/kernel/fpu_sharing/float_disable/prj_x86.conf index 4cb3a46c776c0..ed987a5581e50 100644 --- a/tests/kernel/fpu_sharing/float_disable/prj_x86.conf +++ b/tests/kernel/fpu_sharing/float_disable/prj_x86.conf @@ -2,4 +2,4 @@ CONFIG_ZTEST=y CONFIG_TEST_USERSPACE=y CONFIG_FPU=y CONFIG_FPU_SHARING=y -CONFIG_SSE=y +CONFIG_X86_SSE=y diff --git a/tests/kernel/fpu_sharing/float_disable/testcase.yaml b/tests/kernel/fpu_sharing/float_disable/testcase.yaml index 9e762c916cd34..3b2a08ea7faad 100644 --- a/tests/kernel/fpu_sharing/float_disable/testcase.yaml +++ b/tests/kernel/fpu_sharing/float_disable/testcase.yaml @@ -20,12 +20,12 @@ tests: kernel.fpu_sharing.float_disable.x86.fpu: extra_args: CONF_FILE=prj_x86.conf extra_configs: - - CONFIG_SSE_FP_MATH=n + - CONFIG_X86_SSE_FP_MATH=n platform_allow: qemu_x86 tags: kernel userspace kernel.fpu_sharing.float_disable.x86.sse: extra_args: CONF_FILE=prj_x86.conf extra_configs: - - CONFIG_SSE_FP_MATH=y + - CONFIG_X86_SSE_FP_MATH=y platform_allow: qemu_x86 tags: kernel userspace diff --git a/tests/kernel/fpu_sharing/generic/prj_x86.conf b/tests/kernel/fpu_sharing/generic/prj_x86.conf index d1828266a604d..20c8ab2466905 100644 --- a/tests/kernel/fpu_sharing/generic/prj_x86.conf +++ b/tests/kernel/fpu_sharing/generic/prj_x86.conf @@ -1,5 +1,5 @@ CONFIG_ZTEST=y CONFIG_FPU=y -CONFIG_SSE=y +CONFIG_X86_SSE=y CONFIG_FPU_SHARING=y CONFIG_STDOUT_CONSOLE=y diff --git a/tests/kernel/fpu_sharing/generic/src/main.c b/tests/kernel/fpu_sharing/generic/src/main.c index 2cd3d98a4378e..3d420aa27e58f 100644 --- a/tests/kernel/fpu_sharing/generic/src/main.c +++ b/tests/kernel/fpu_sharing/generic/src/main.c @@ -17,7 +17,7 @@ #error Rebuild with the FPU_SHARING config option enabled #endif -#if defined(CONFIG_X86) && !defined(CONFIG_SSE) +#if defined(CONFIG_X86) && !defined(CONFIG_X86_SSE) #error Rebuild with the SSE config option enabled #endif diff --git a/tests/kernel/fpu_sharing/generic/testcase.yaml b/tests/kernel/fpu_sharing/generic/testcase.yaml index 86dfebd2b032e..d900614dc7beb 100644 --- a/tests/kernel/fpu_sharing/generic/testcase.yaml +++ b/tests/kernel/fpu_sharing/generic/testcase.yaml @@ -36,7 +36,7 @@ tests: kernel.fpu_sharing.generic.x86.fpu: extra_args: CONF_FILE=prj_x86.conf extra_configs: - - CONFIG_SSE_FP_MATH=n + - CONFIG_X86_SSE_FP_MATH=n platform_allow: qemu_x86 slow: true tags: kernel @@ -44,7 +44,7 @@ tests: kernel.fpu_sharing.generic.x86.sse: extra_args: CONF_FILE=prj_x86.conf extra_configs: - - CONFIG_SSE_FP_MATH=y + - CONFIG_X86_SSE_FP_MATH=y platform_allow: qemu_x86 slow: true tags: kernel From d6faf57796eeea92bdb489d904fbb4fc500aecd8 Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 7 Jan 2021 16:13:52 -0800 Subject: [PATCH 5/5] x86: add kconfigs and compiler flags for MMX and SSE* This adds kconfigs and compiler flags to support MMX and SSE* instructions. Signed-off-by: Daniel Leung --- arch/x86/Kconfig | 97 +++++++++++++++++++++++++++++++++++++++- arch/x86/ia32.cmake | 42 +++++++++++++++++ arch/x86/intel64.cmake | 36 +++++++++++++++ soc/x86/ia32/Kconfig.soc | 2 + 4 files changed, 176 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c4d59a2d842d3..a344547909e57 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -18,6 +18,10 @@ config CPU_ATOM select CPU_HAS_FPU select ARCH_HAS_STACK_PROTECTION if X86_MMU select ARCH_HAS_USERSPACE if X86_MMU + select X86_CPU_HAS_MMX + select X86_CPU_HAS_SSE + select X86_CPU_HAS_SSE2 + select X86_CPU_HAS_SSE3 help This option signifies the use of a CPU from the Atom family. @@ -33,6 +37,14 @@ config CPU_APOLLO_LAKE select CPU_HAS_FPU select ARCH_HAS_STACK_PROTECTION if X86_MMU select ARCH_HAS_USERSPACE if X86_MMU + select X86_MMU + select X86_CPU_HAS_MMX + select X86_CPU_HAS_SSE + select X86_CPU_HAS_SSE2 + select X86_CPU_HAS_SSE3 + select X86_CPU_HAS_SSSE3 + select X86_CPU_HAS_SSE41 + select X86_CPU_HAS_SSE42 help This option signifies the use of a CPU from the Apollo Lake family. @@ -47,16 +59,97 @@ config X86_64 select USE_SWITCH_SUPPORTED select SCHED_IPI_SUPPORTED select X86_MMU + select X86_CPU_HAS_MMX + select X86_CPU_HAS_SSE + select X86_CPU_HAS_SSE2 + select X86_MMX + select X86_SSE + select X86_SSE2 menu "x86 Features" +config X86_CPU_HAS_MMX + bool + +config X86_CPU_HAS_SSE + bool + +config X86_CPU_HAS_SSE2 + bool + +config X86_CPU_HAS_SSE3 + bool + +config X86_CPU_HAS_SSSE3 + bool + +config X86_CPU_HAS_SSE41 + bool + +config X86_CPU_HAS_SSE42 + bool + +config X86_CPU_HAS_SSE4A + bool + +if FPU || X86_64 + +config X86_MMX + bool "Enable MMX Support" + depends on X86_CPU_HAS_MMX + help + This option enables MMX support, and the use of MMX registers + by threads. + config X86_SSE bool "Enable SSE Support" - depends on FPU + depends on X86_CPU_HAS_SSE help This option enables SSE support, and the use of SSE registers by threads. +config X86_SSE2 + bool "Enable SSE2 Support" + depends on X86_CPU_HAS_SSE2 + select X86_SSE + help + This option enables SSE2 support. + +config X86_SSE3 + bool "Enable SSE3 Support" + depends on X86_CPU_HAS_SSE3 + select X86_SSE + help + This option enables SSE3 support. + +config X86_SSSE3 + bool "Enable SSSE3 (Supplemental SSE3) Support" + depends on X86_CPU_HAS_SSSE3 + select X86_SSE + help + This option enables Supplemental SSE3 support. + +config X86_SSE41 + bool "Enable SSE4.1 Support" + depends on X86_CPU_HAS_SSE41 + select X86_SSE + help + This option enables SSE4.1 support. + +config X86_SSE42 + bool "Enable SSE4.2 Support" + depends on X86_CPU_HAS_SSE42 + select X86_SSE + help + This option enables SSE4.2 support. + +config X86_SSE4A + bool "Enable SSE4A Support" + depends on X86_CPU_HAS_SSE4A + select X86_SSE + help + This option enables SSE4A support. + config X86_SSE_FP_MATH bool "Compiler-generated SSEx instructions for floating point math" depends on X86_SSE @@ -71,6 +164,8 @@ config X86_SSE_FP_MATH Disabling this option means that the compiler utilizes only the x87 instruction set for floating point operations. +endif # FPU || X86_64 + endmenu config X86_KERNEL_OFFSET diff --git a/arch/x86/ia32.cmake b/arch/x86/ia32.cmake index 5bb999d2c201d..6f7182154562a 100644 --- a/arch/x86/ia32.cmake +++ b/arch/x86/ia32.cmake @@ -21,6 +21,12 @@ if(CMAKE_C_COMPILER_ID STREQUAL "Clang") ) endif() +if(CONFIG_X86_MMX) + zephyr_cc_option(-mmmx) +else() + zephyr_cc_option(-mno-mmx) +endif() + if(CONFIG_X86_SSE) zephyr_cc_option(-msse) @@ -30,6 +36,42 @@ if(CONFIG_X86_SSE) zephyr_cc_option(-mfpmath=387) endif() + if(CONFIG_X86_SSE2) + zephyr_cc_option(-msse2) + else() + zephyr_cc_option(-mno-sse2) + endif() + + if(CONFIG_X86_SSE3) + zephyr_cc_option(-msse3) + else() + zephyr_cc_option(-mno-sse3) + endif() + + if(CONFIG_X86_SSSE3) + zephyr_cc_option(-mssse3) + else() + zephyr_cc_option(-mno-ssse3) + endif() + + if(CONFIG_X86_SSE41) + zephyr_cc_option(-msse4.1) + else() + zephyr_cc_option(-mno-sse4.1) + endif() + + if(CONFIG_X86_SSE42) + zephyr_cc_option(-msse4.2) + else() + zephyr_cc_option(-mno-sse4.2) + endif() + + if(CONFIG_X86_SSE4A) + zephyr_cc_option(-msse4a) + else() + zephyr_cc_option(-mno-sse4a) + endif() + else() zephyr_cc_option(-mno-sse) endif() diff --git a/arch/x86/intel64.cmake b/arch/x86/intel64.cmake index a2a9f73b88922..fcc075addc20d 100644 --- a/arch/x86/intel64.cmake +++ b/arch/x86/intel64.cmake @@ -8,4 +8,40 @@ set_property(GLOBAL PROPERTY PROPERTY_OUTPUT_FORMAT "elf64-x86-64") get_property(OUTPUT_ARCH GLOBAL PROPERTY PROPERTY_OUTPUT_ARCH) get_property(OUTPUT_FORMAT GLOBAL PROPERTY PROPERTY_OUTPUT_FORMAT) +if(CONFIG_X86_SSE) + # x86-64 by default has SSE and SSE2 + # so no need to add compiler flags for them. + + if(CONFIG_X86_SSE3) + zephyr_cc_option(-msse3) + else() + zephyr_cc_option(-mno-sse3) + endif() + + if(CONFIG_X86_SSSE3) + zephyr_cc_option(-mssse3) + else() + zephyr_cc_option(-mno-ssse3) + endif() + + if(CONFIG_X86_SSE41) + zephyr_cc_option(-msse4.1) + else() + zephyr_cc_option(-mno-sse4.1) + endif() + + if(CONFIG_X86_SSE42) + zephyr_cc_option(-msse4.2) + else() + zephyr_cc_option(-mno-sse4.2) + endif() + + if(CONFIG_X86_SSE4A) + zephyr_cc_option(-msse4a) + else() + zephyr_cc_option(-mno-sse4a) + endif() + +endif() + add_subdirectory(core) diff --git a/soc/x86/ia32/Kconfig.soc b/soc/x86/ia32/Kconfig.soc index 8b00c5bf3e58b..75c86855a0750 100644 --- a/soc/x86/ia32/Kconfig.soc +++ b/soc/x86/ia32/Kconfig.soc @@ -4,4 +4,6 @@ config SOC_IA32 bool "Generic IA32 SoC" select X86 select CPU_MINUTEIA + select X86_CPU_HAS_MMX + select X86_CPU_HAS_SSE select ARCH_HAS_RESERVED_PAGE_FRAMES if SRAM_BASE_ADDRESS = 0