Skip to content

Commit 94a8551

Browse files
committed
Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Borislav Petkov: - Add the call depth tracking mitigation for Retbleed which has been long in the making. It is a lighterweight software-only fix for Skylake-based cores where enabling IBRS is a big hammer and causes a significant performance impact. What it basically does is, it aligns all kernel functions to 16 bytes boundary and adds a 16-byte padding before the function, objtool collects all functions' locations and when the mitigation gets applied, it patches a call accounting thunk which is used to track the call depth of the stack at any time. When that call depth reaches a magical, microarchitecture-specific value for the Return Stack Buffer, the code stuffs that RSB and avoids its underflow which could otherwise lead to the Intel variant of Retbleed. This software-only solution brings a lot of the lost performance back, as benchmarks suggest: https://lore.kernel.org/all/[email protected]/ That page above also contains a lot more detailed explanation of the whole mechanism - Implement a new control flow integrity scheme called FineIBT which is based on the software kCFI implementation and uses hardware IBT support where present to annotate and track indirect branches using a hash to validate them - Other misc fixes and cleanups * tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits) x86/paravirt: Use common macro for creating simple asm paravirt functions x86/paravirt: Remove clobber bitmask from .parainstructions x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit x86/Kconfig: Enable kernel IBT by default x86,pm: Force out-of-line memcpy() objtool: Fix weak hole vs prefix symbol objtool: Optimize elf_dirty_reloc_sym() x86/cfi: Add boot time hash randomization x86/cfi: Boot time selection of CFI scheme x86/ibt: Implement FineIBT objtool: Add --cfi to generate the .cfi_sites section x86: Add prefix symbols for function padding objtool: Add option to generate prefix symbols objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf objtool: Slice up elf_create_section_symbol() kallsyms: Revert "Take callthunks into account" x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces x86/retpoline: Fix crash printing warning x86/paravirt: Fix a !PARAVIRT build warning ...
2 parents 93761c9 + f1a033c commit 94a8551

File tree

103 files changed

+2705
-593
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+2705
-593
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,8 +1006,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI)
10061006
export CC_FLAGS_CFI
10071007
endif
10081008

1009-
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B
1010-
KBUILD_CFLAGS += -falign-functions=64
1009+
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
1010+
KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
10111011
endif
10121012

10131013
# arch Makefile may override CC so keep this after arch Makefile is included

arch/Kconfig

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,4 +1438,28 @@ source "kernel/gcov/Kconfig"
14381438

14391439
source "scripts/gcc-plugins/Kconfig"
14401440

1441+
config FUNCTION_ALIGNMENT_4B
1442+
bool
1443+
1444+
config FUNCTION_ALIGNMENT_8B
1445+
bool
1446+
1447+
config FUNCTION_ALIGNMENT_16B
1448+
bool
1449+
1450+
config FUNCTION_ALIGNMENT_32B
1451+
bool
1452+
1453+
config FUNCTION_ALIGNMENT_64B
1454+
bool
1455+
1456+
config FUNCTION_ALIGNMENT
1457+
int
1458+
default 64 if FUNCTION_ALIGNMENT_64B
1459+
default 32 if FUNCTION_ALIGNMENT_32B
1460+
default 16 if FUNCTION_ALIGNMENT_16B
1461+
default 8 if FUNCTION_ALIGNMENT_8B
1462+
default 4 if FUNCTION_ALIGNMENT_4B
1463+
default 0
1464+
14411465
endmenu

arch/ia64/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ config IA64
6363
select NUMA if !FLATMEM
6464
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
6565
select ZONE_DMA32
66+
select FUNCTION_ALIGNMENT_32B
6667
default y
6768
help
6869
The Itanium Processor Family is Intel's 64-bit successor to

arch/ia64/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp
2323
EXTRA :=
2424

2525
cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
26-
-falign-functions=32 -frename-registers -fno-optimize-sibling-calls
26+
-frename-registers -fno-optimize-sibling-calls
2727
KBUILD_CFLAGS_KERNEL := -mconstant-gp
2828

2929
GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")

arch/um/kernel/um_arch.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end)
444444
{
445445
}
446446

447+
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
448+
s32 *start_cfi, s32 *end_cfi)
449+
{
450+
}
451+
447452
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
448453
{
449454
}

arch/x86/Kconfig

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ config X86
292292
select X86_FEATURE_NAMES if PROC_FS
293293
select PROC_PID_ARCH_STATUS if PROC_FS
294294
select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX
295+
select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16
296+
select FUNCTION_ALIGNMENT_4B
295297
imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI
296298
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
297299

@@ -1855,7 +1857,7 @@ config CC_HAS_IBT
18551857

18561858
config X86_KERNEL_IBT
18571859
prompt "Indirect Branch Tracking"
1858-
bool
1860+
def_bool y
18591861
depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
18601862
# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
18611863
depends on !LD_IS_LLD || LLD_VERSION >= 140000
@@ -2492,6 +2494,46 @@ config CC_HAS_SLS
24922494
config CC_HAS_RETURN_THUNK
24932495
def_bool $(cc-option,-mfunction-return=thunk-extern)
24942496

2497+
config CC_HAS_ENTRY_PADDING
2498+
def_bool $(cc-option,-fpatchable-function-entry=16,16)
2499+
2500+
config FUNCTION_PADDING_CFI
2501+
int
2502+
default 59 if FUNCTION_ALIGNMENT_64B
2503+
default 27 if FUNCTION_ALIGNMENT_32B
2504+
default 11 if FUNCTION_ALIGNMENT_16B
2505+
default 3 if FUNCTION_ALIGNMENT_8B
2506+
default 0
2507+
2508+
# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG
2509+
# except Kconfig can't do arithmetic :/
2510+
config FUNCTION_PADDING_BYTES
2511+
int
2512+
default FUNCTION_PADDING_CFI if CFI_CLANG
2513+
default FUNCTION_ALIGNMENT
2514+
2515+
config CALL_PADDING
2516+
def_bool n
2517+
depends on CC_HAS_ENTRY_PADDING && OBJTOOL
2518+
select FUNCTION_ALIGNMENT_16B
2519+
2520+
config FINEIBT
2521+
def_bool y
2522+
depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
2523+
select CALL_PADDING
2524+
2525+
config HAVE_CALL_THUNKS
2526+
def_bool y
2527+
depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
2528+
2529+
config CALL_THUNKS
2530+
def_bool n
2531+
select CALL_PADDING
2532+
2533+
config PREFIX_SYMBOLS
2534+
def_bool y
2535+
depends on CALL_PADDING && !CFI_CLANG
2536+
24952537
menuconfig SPECULATION_MITIGATIONS
24962538
bool "Mitigations for speculative execution vulnerabilities"
24972539
default y
@@ -2543,6 +2585,37 @@ config CPU_UNRET_ENTRY
25432585
help
25442586
Compile the kernel with support for the retbleed=unret mitigation.
25452587

2588+
config CALL_DEPTH_TRACKING
2589+
bool "Mitigate RSB underflow with call depth tracking"
2590+
depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS
2591+
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
2592+
select CALL_THUNKS
2593+
default y
2594+
help
2595+
Compile the kernel with call depth tracking to mitigate the Intel
2596+
SKL Return-Speculation-Buffer (RSB) underflow issue. The
2597+
mitigation is off by default and needs to be enabled on the
2598+
kernel command line via the retbleed=stuff option. For
2599+
non-affected systems the overhead of this option is marginal as
2600+
the call depth tracking is using run-time generated call thunks
2601+
in a compiler generated padding area and call patching. This
2602+
increases text size by ~5%. For non affected systems this space
2603+
is unused. On affected SKL systems this results in a significant
2604+
performance gain over the IBRS mitigation.
2605+
2606+
config CALL_THUNKS_DEBUG
2607+
bool "Enable call thunks and call depth tracking debugging"
2608+
depends on CALL_DEPTH_TRACKING
2609+
select FUNCTION_ALIGNMENT_32B
2610+
default n
2611+
help
2612+
Enable call/ret counters for imbalance detection and build in
2613+
a noisy dmesg about callthunks generation and call patching for
2614+
trouble shooting. The debug prints need to be enabled on the
2615+
kernel command line with 'debug-callthunks'.
2616+
Only enable this, when you are debugging call thunks as this
2617+
creates a noticable runtime overhead. If unsure say N.
2618+
25462619
config CPU_IBPB_ENTRY
25472620
bool "Enable IBPB on kernel entry"
25482621
depends on CPU_SUP_AMD && X86_64

arch/x86/Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,12 @@ ifdef CONFIG_SLS
208208
KBUILD_CFLAGS += -mharden-sls=all
209209
endif
210210

211+
ifdef CONFIG_CALL_PADDING
212+
PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
213+
KBUILD_CFLAGS += $(PADDING_CFLAGS)
214+
export PADDING_CFLAGS
215+
endif
216+
211217
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
212218

213219
ifdef CONFIG_LTO_CLANG

arch/x86/boot/compressed/head_64.S

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@
3737
#include <asm/trapnr.h>
3838
#include "pgtable.h"
3939

40+
/*
41+
* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
42+
* in assembly errors due to trying to move .org backward due to the excessive
43+
* alignment.
44+
*/
45+
#undef __ALIGN
46+
#define __ALIGN .balign 16, 0x90
47+
4048
/*
4149
* Locally defined symbols should be marked hidden:
4250
*/

arch/x86/crypto/camellia-aesni-avx-asm_64.S

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
712712

713713
.text
714714

715-
.align 8
716715
SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
717716
/* input:
718717
* %rdi: ctx, CTX
@@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
799798
jmp .Lenc_done;
800799
SYM_FUNC_END(__camellia_enc_blk16)
801800

802-
.align 8
803801
SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
804802
/* input:
805803
* %rdi: ctx, CTX

arch/x86/crypto/camellia-aesni-avx2-asm_64.S

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,13 @@
221221
* Size optimization... with inlined roundsm32 binary would be over 5 times
222222
* larger and would only marginally faster.
223223
*/
224-
.align 8
225224
SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
226225
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
227226
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
228227
%rcx, (%r9));
229228
RET;
230229
SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
231230

232-
.align 8
233231
SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
234232
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
235233
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
@@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
748746

749747
.text
750748

751-
.align 8
752749
SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
753750
/* input:
754751
* %rdi: ctx, CTX
@@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
835832
jmp .Lenc_done;
836833
SYM_FUNC_END(__camellia_enc_blk32)
837834

838-
.align 8
839835
SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
840836
/* input:
841837
* %rdi: ctx, CTX

0 commit comments

Comments
 (0)