Skip to content

Commit f091d5a

Browse files
Eugeniy Paltsevvineetgarc
authored andcommitted
ARC: ARCv2: jump label: implement jump label patching
Implement jump label patching for ARC. Jump labels provide an interface to generate dynamic branches using self-modifying code. This allows us to implement conditional branches where changing branch direction is expensive but branch selection is basically 'free' This implementation uses 32-bit NOP and BRANCH instructions which forced to be aligned by 4 to guarantee that they don't cross L1 cache line boundary and can be update atomically. Signed-off-by: Eugeniy Paltsev <[email protected]> Signed-off-by: Vineet Gupta <[email protected]>
1 parent 2f4ecf6 commit f091d5a

File tree

5 files changed

+253
-0
lines changed

5 files changed

+253
-0
lines changed

arch/arc/Kconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ config ARC
4646
select OF_EARLY_FLATTREE
4747
select PCI_SYSCALL if PCI
4848
select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
49+
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
4950

5051
config ARCH_HAS_CACHE_LINE_SIZE
5152
def_bool y
@@ -525,6 +526,13 @@ config ARC_DW2_UNWIND
525526
config ARC_DBG_TLB_PARANOIA
526527
bool "Paranoia Checks in Low Level TLB Handlers"
527528

529+
config ARC_DBG_JUMP_LABEL
530+
bool "Paranoid checks in Static Keys (jump labels) code"
531+
depends on JUMP_LABEL
532+
default y if STATIC_KEYS_SELFTEST
533+
help
534+
Enable paranoid checks and self-test of both ARC-specific and generic
535+
part of static keys (jump labels) related code.
528536
endif
529537

530538
config ARC_BUILTIN_DTB_NAME

arch/arc/include/asm/cache.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
#ifndef __ASSEMBLY__
2727

28+
#include <linux/build_bug.h>
29+
2830
/* Uncached access macros */
2931
#define arc_read_uncached_32(ptr) \
3032
({ \

arch/arc/include/asm/jump_label.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_ARC_JUMP_LABEL_H
3+
#define _ASM_ARC_JUMP_LABEL_H
4+
5+
#ifndef __ASSEMBLY__
6+
7+
#include <linux/stringify.h>
8+
#include <linux/types.h>
9+
10+
#define JUMP_LABEL_NOP_SIZE 4
11+
12+
/*
13+
* NOTE about '.balign 4':
14+
*
15+
* To make atomic update of patched instruction available we need to guarantee
16+
* that this instruction doesn't cross L1 cache line boundary.
17+
*
18+
* As of today we simply align instruction which can be patched by 4 byte using
19+
* ".balign 4" directive. In that case patched instruction is aligned with one
20+
* 16-bit NOP_S if this is required.
21+
* However 'align by 4' directive is much stricter than it actually required.
22+
* It's enough that our 32-bit instruction don't cross L1 cache line boundary /
23+
* L1 I$ fetch block boundary which can be achieved by using
24+
* ".bundle_align_mode" assembler directive. That will save us from adding
25+
* useless NOP_S padding in most of the cases.
26+
*
27+
* TODO: switch to ".bundle_align_mode" directive using whin it will be
28+
* supported by ARC toolchain.
29+
*/
30+
31+
static __always_inline bool arch_static_branch(struct static_key *key,
32+
bool branch)
33+
{
34+
asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
35+
"1: \n"
36+
"nop \n"
37+
".pushsection __jump_table, \"aw\" \n"
38+
".word 1b, %l[l_yes], %c0 \n"
39+
".popsection \n"
40+
: : "i" (&((char *)key)[branch]) : : l_yes);
41+
42+
return false;
43+
l_yes:
44+
return true;
45+
}
46+
47+
static __always_inline bool arch_static_branch_jump(struct static_key *key,
48+
bool branch)
49+
{
50+
asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n"
51+
"1: \n"
52+
"b %l[l_yes] \n"
53+
".pushsection __jump_table, \"aw\" \n"
54+
".word 1b, %l[l_yes], %c0 \n"
55+
".popsection \n"
56+
: : "i" (&((char *)key)[branch]) : : l_yes);
57+
58+
return false;
59+
l_yes:
60+
return true;
61+
}
62+
63+
typedef u32 jump_label_t;
64+
65+
struct jump_entry {
66+
jump_label_t code;
67+
jump_label_t target;
68+
jump_label_t key;
69+
};
70+
71+
#endif /* __ASSEMBLY__ */
72+
#endif

arch/arc/kernel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o
2020
obj-$(CONFIG_KGDB) += kgdb.o
2121
obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o
2222
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
23+
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
2324

2425
obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o
2526
CFLAGS_fpu.o += -mdpfp

arch/arc/kernel/jump_label.c

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/kernel.h>
4+
#include <linux/jump_label.h>
5+
6+
#include "asm/cacheflush.h"
7+
8+
#define JUMPLABEL_ERR "ARC: jump_label: ERROR: "
9+
10+
/* Halt system on fatal error to make debug easier */
11+
#define arc_jl_fatal(format...) \
12+
({ \
13+
pr_err(JUMPLABEL_ERR format); \
14+
BUG(); \
15+
})
16+
17+
static inline u32 arc_gen_nop(void)
18+
{
19+
/* 1x 32bit NOP in middle endian */
20+
return 0x7000264a;
21+
}
22+
23+
/*
24+
* Atomic update of patched instruction is only available if this
25+
* instruction doesn't cross L1 cache line boundary. You can read about
26+
* the way we achieve this in arc/include/asm/jump_label.h
27+
*/
28+
static inline void instruction_align_assert(void *addr, int len)
29+
{
30+
unsigned long a = (unsigned long)addr;
31+
32+
if ((a >> L1_CACHE_SHIFT) != ((a + len - 1) >> L1_CACHE_SHIFT))
33+
arc_jl_fatal("instruction (addr %px) cross L1 cache line border",
34+
addr);
35+
}
36+
37+
/*
38+
* ARCv2 'Branch unconditionally' instruction:
39+
* 00000ssssssssss1SSSSSSSSSSNRtttt
40+
* s S[n:0] lower bits signed immediate (number is bitfield size)
41+
* S S[m:n+1] upper bits signed immediate (number is bitfield size)
42+
* t S[24:21] upper bits signed immediate (branch unconditionally far)
43+
* N N <.d> delay slot mode
44+
* R R Reserved
45+
*/
46+
static inline u32 arc_gen_branch(jump_label_t pc, jump_label_t target)
47+
{
48+
u32 instruction_l, instruction_r;
49+
u32 pcl = pc & GENMASK(31, 2);
50+
u32 u_offset = target - pcl;
51+
u32 s, S, t;
52+
53+
/*
54+
* Offset in 32-bit branch instruction must to fit into s25.
55+
* Something is terribly broken if we get such huge offset within one
56+
* function.
57+
*/
58+
if ((s32)u_offset < -16777216 || (s32)u_offset > 16777214)
59+
arc_jl_fatal("gen branch with offset (%d) not fit in s25",
60+
(s32)u_offset);
61+
62+
/*
63+
* All instructions are aligned by 2 bytes so we should never get offset
64+
* here which is not 2 bytes aligned.
65+
*/
66+
if (u_offset & 0x1)
67+
arc_jl_fatal("gen branch with offset (%d) unaligned to 2 bytes",
68+
(s32)u_offset);
69+
70+
s = (u_offset >> 1) & GENMASK(9, 0);
71+
S = (u_offset >> 11) & GENMASK(9, 0);
72+
t = (u_offset >> 21) & GENMASK(3, 0);
73+
74+
/* 00000ssssssssss1 */
75+
instruction_l = (s << 1) | 0x1;
76+
/* SSSSSSSSSSNRtttt */
77+
instruction_r = (S << 6) | t;
78+
79+
return (instruction_r << 16) | (instruction_l & GENMASK(15, 0));
80+
}
81+
82+
void arch_jump_label_transform(struct jump_entry *entry,
83+
enum jump_label_type type)
84+
{
85+
jump_label_t *instr_addr = (jump_label_t *)entry->code;
86+
u32 instr;
87+
88+
instruction_align_assert(instr_addr, JUMP_LABEL_NOP_SIZE);
89+
90+
if (type == JUMP_LABEL_JMP)
91+
instr = arc_gen_branch(entry->code, entry->target);
92+
else
93+
instr = arc_gen_nop();
94+
95+
WRITE_ONCE(*instr_addr, instr);
96+
flush_icache_range(entry->code, entry->code + JUMP_LABEL_NOP_SIZE);
97+
}
98+
99+
void arch_jump_label_transform_static(struct jump_entry *entry,
100+
enum jump_label_type type)
101+
{
102+
/*
103+
* We use only one NOP type (1x, 4 byte) in arch_static_branch, so
104+
* there's no need to patch an identical NOP over the top of it here.
105+
* The generic code calls 'arch_jump_label_transform' if the NOP needs
106+
* to be replaced by a branch, so 'arch_jump_label_transform_static' is
107+
* never called with type other than JUMP_LABEL_NOP.
108+
*/
109+
BUG_ON(type != JUMP_LABEL_NOP);
110+
}
111+
112+
#ifdef CONFIG_ARC_DBG_JUMP_LABEL
113+
#define SELFTEST_MSG "ARC: instruction generation self-test: "
114+
115+
struct arc_gen_branch_testdata {
116+
jump_label_t pc;
117+
jump_label_t target_address;
118+
u32 expected_instr;
119+
};
120+
121+
static __init int branch_gen_test(const struct arc_gen_branch_testdata *test)
122+
{
123+
u32 instr_got;
124+
125+
instr_got = arc_gen_branch(test->pc, test->target_address);
126+
if (instr_got == test->expected_instr)
127+
return 0;
128+
129+
pr_err(SELFTEST_MSG "FAIL:\n arc_gen_branch(0x%08x, 0x%08x) != 0x%08x, got 0x%08x\n",
130+
test->pc, test->target_address,
131+
test->expected_instr, instr_got);
132+
133+
return -EFAULT;
134+
}
135+
136+
/*
137+
* Offset field in branch instruction is not continuous. Test all
138+
* available offset field and sign combinations. Test data is generated
139+
* from real working code.
140+
*/
141+
static const struct arc_gen_branch_testdata arcgenbr_test_data[] __initconst = {
142+
{0x90007548, 0x90007514, 0xffcf07cd}, /* tiny (-52) offs */
143+
{0x9000c9c0, 0x9000c782, 0xffcf05c3}, /* tiny (-574) offs */
144+
{0x9000cc1c, 0x9000c782, 0xffcf0367}, /* tiny (-1178) offs */
145+
{0x9009dce0, 0x9009d106, 0xff8f0427}, /* small (-3034) offs */
146+
{0x9000f5de, 0x90007d30, 0xfc0f0755}, /* big (-30892) offs */
147+
{0x900a2444, 0x90035f64, 0xc9cf0321}, /* huge (-443616) offs */
148+
{0x90007514, 0x9000752c, 0x00000019}, /* tiny (+24) offs */
149+
{0x9001a578, 0x9001a77a, 0x00000203}, /* tiny (+514) offs */
150+
{0x90031ed8, 0x90032634, 0x0000075d}, /* tiny (+1884) offs */
151+
{0x9008c7f2, 0x9008d3f0, 0x00400401}, /* small (+3072) offs */
152+
{0x9000bb38, 0x9003b340, 0x17c00009}, /* big (+194568) offs */
153+
{0x90008f44, 0x90578d80, 0xb7c2063d} /* huge (+5701180) offs */
154+
};
155+
156+
static __init int instr_gen_test(void)
157+
{
158+
int i;
159+
160+
for (i = 0; i < ARRAY_SIZE(arcgenbr_test_data); i++)
161+
if (branch_gen_test(&arcgenbr_test_data[i]))
162+
return -EFAULT;
163+
164+
pr_info(SELFTEST_MSG "OK\n");
165+
166+
return 0;
167+
}
168+
early_initcall(instr_gen_test);
169+
170+
#endif /* CONFIG_ARC_DBG_JUMP_LABEL */

0 commit comments

Comments
 (0)