Skip to content

Commit a581909

Browse files
committed
Merge branch 'runtime-constants'
Merge runtime constants infrastructure with implementations for x86 and arm64. This is one of four branches that came out of me looking at profiles of my kernel build filesystem load on my 128-core Altra arm64 system, where pathname walking and the user copies (particularly strncpy_from_user() for fetching the pathname from user space) is very hot. This is a very specialized "instruction alternatives" model where the dentry hash pointer and hash count will be constants for the lifetime of the kernel, but the allocation are not static but done early during the kernel boot. In order to avoid the pointer load and dynamic shift, we just rewrite the constants in the instructions in place. We can't use the "generic" alternative instructions infrastructure, because different architectures do it very differently, and it's actually simpler to just have very specific helpers, with a fallback to the generic ("old") model of just using variables for architectures that do not implement the runtime constant patching infrastructure. Link: https://lore.kernel.org/all/CAHk-=widPe38fUNjUOmX11ByDckaeEo9tN4Eiyke9u1SAtu9sA@mail.gmail.com/ * runtime-constants: arm64: add 'runtime constant' support runtime constants: add x86 architecture support runtime constants: add default dummy infrastructure vfs: dcache: move hashlen_hash() from callers into d_hash()
2 parents 0c38364 + 94a2bc0 commit a581909

File tree

8 files changed

+192
-4
lines changed

8 files changed

+192
-4
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_RUNTIME_CONST_H
3+
#define _ASM_RUNTIME_CONST_H
4+
5+
#include <asm/cacheflush.h>
6+
7+
/* Sigh. You can still run arm64 in BE mode */
8+
#include <asm/byteorder.h>
9+
10+
#define runtime_const_ptr(sym) ({ \
11+
typeof(sym) __ret; \
12+
asm_inline("1:\t" \
13+
"movz %0, #0xcdef\n\t" \
14+
"movk %0, #0x89ab, lsl #16\n\t" \
15+
"movk %0, #0x4567, lsl #32\n\t" \
16+
"movk %0, #0x0123, lsl #48\n\t" \
17+
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
18+
".long 1b - .\n\t" \
19+
".popsection" \
20+
:"=r" (__ret)); \
21+
__ret; })
22+
23+
#define runtime_const_shift_right_32(val, sym) ({ \
24+
unsigned long __ret; \
25+
asm_inline("1:\t" \
26+
"lsr %w0,%w1,#12\n\t" \
27+
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
28+
".long 1b - .\n\t" \
29+
".popsection" \
30+
:"=r" (__ret) \
31+
:"r" (0u+(val))); \
32+
__ret; })
33+
34+
#define runtime_const_init(type, sym) do { \
35+
extern s32 __start_runtime_##type##_##sym[]; \
36+
extern s32 __stop_runtime_##type##_##sym[]; \
37+
runtime_const_fixup(__runtime_fixup_##type, \
38+
(unsigned long)(sym), \
39+
__start_runtime_##type##_##sym, \
40+
__stop_runtime_##type##_##sym); \
41+
} while (0)
42+
43+
/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
44+
static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
45+
{
46+
u32 insn = le32_to_cpu(*p);
47+
insn &= 0xffe0001f;
48+
insn |= (val & 0xffff) << 5;
49+
*p = cpu_to_le32(insn);
50+
}
51+
52+
static inline void __runtime_fixup_caches(void *where, unsigned int insns)
53+
{
54+
unsigned long va = (unsigned long)where;
55+
caches_clean_inval_pou(va, va + 4*insns);
56+
}
57+
58+
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
59+
{
60+
__le32 *p = lm_alias(where);
61+
__runtime_fixup_16(p, val);
62+
__runtime_fixup_16(p+1, val >> 16);
63+
__runtime_fixup_16(p+2, val >> 32);
64+
__runtime_fixup_16(p+3, val >> 48);
65+
__runtime_fixup_caches(where, 4);
66+
}
67+
68+
/* Immediate value is 6 bits starting at bit #16 */
69+
static inline void __runtime_fixup_shift(void *where, unsigned long val)
70+
{
71+
__le32 *p = lm_alias(where);
72+
u32 insn = le32_to_cpu(*p);
73+
insn &= 0xffc0ffff;
74+
insn |= (val & 63) << 16;
75+
*p = cpu_to_le32(insn);
76+
__runtime_fixup_caches(where, 1);
77+
}
78+
79+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
80+
unsigned long val, s32 *start, s32 *end)
81+
{
82+
while (start < end) {
83+
fn(*start + (void *)start, val);
84+
start++;
85+
}
86+
}
87+
88+
#endif

arch/arm64/kernel/vmlinux.lds.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,9 @@ SECTIONS
264264
EXIT_DATA
265265
}
266266

267+
RUNTIME_CONST(shift, d_hash_shift)
268+
RUNTIME_CONST(ptr, dentry_hashtable)
269+
267270
PERCPU_SECTION(L1_CACHE_BYTES)
268271
HYPERVISOR_PERCPU_SECTION
269272

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_RUNTIME_CONST_H
3+
#define _ASM_RUNTIME_CONST_H
4+
5+
#define runtime_const_ptr(sym) ({ \
6+
typeof(sym) __ret; \
7+
asm_inline("mov %1,%0\n1:\n" \
8+
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
9+
".long 1b - %c2 - .\n\t" \
10+
".popsection" \
11+
:"=r" (__ret) \
12+
:"i" ((unsigned long)0x0123456789abcdefull), \
13+
"i" (sizeof(long))); \
14+
__ret; })
15+
16+
// The 'typeof' will create at _least_ a 32-bit type, but
17+
// will happily also take a bigger type and the 'shrl' will
18+
// clear the upper bits
19+
#define runtime_const_shift_right_32(val, sym) ({ \
20+
typeof(0u+(val)) __ret = (val); \
21+
asm_inline("shrl $12,%k0\n1:\n" \
22+
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
23+
".long 1b - 1 - .\n\t" \
24+
".popsection" \
25+
:"+r" (__ret)); \
26+
__ret; })
27+
28+
#define runtime_const_init(type, sym) do { \
29+
extern s32 __start_runtime_##type##_##sym[]; \
30+
extern s32 __stop_runtime_##type##_##sym[]; \
31+
runtime_const_fixup(__runtime_fixup_##type, \
32+
(unsigned long)(sym), \
33+
__start_runtime_##type##_##sym, \
34+
__stop_runtime_##type##_##sym); \
35+
} while (0)
36+
37+
/*
38+
* The text patching is trivial - you can only do this at init time,
39+
* when the text section hasn't been marked RO, and before the text
40+
* has ever been executed.
41+
*/
42+
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
43+
{
44+
*(unsigned long *)where = val;
45+
}
46+
47+
static inline void __runtime_fixup_shift(void *where, unsigned long val)
48+
{
49+
*(unsigned char *)where = val;
50+
}
51+
52+
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
53+
unsigned long val, s32 *start, s32 *end)
54+
{
55+
while (start < end) {
56+
fn(*start + (void *)start, val);
57+
start++;
58+
}
59+
}
60+
61+
#endif

arch/x86/kernel/vmlinux.lds.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,9 @@ SECTIONS
357357
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
358358
#endif
359359

360+
RUNTIME_CONST(shift, d_hash_shift)
361+
RUNTIME_CONST(ptr, dentry_hashtable)
362+
360363
. = ALIGN(PAGE_SIZE);
361364

362365
/* freed after init ends here */

fs/dcache.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "internal.h"
3636
#include "mount.h"
3737

38+
#include <asm/runtime-const.h>
39+
3840
/*
3941
* Usage:
4042
* dcache->d_inode->i_lock protects:
@@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init;
100102

101103
static struct hlist_bl_head *dentry_hashtable __ro_after_init;
102104

103-
static inline struct hlist_bl_head *d_hash(unsigned int hash)
105+
static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
104106
{
105-
return dentry_hashtable + (hash >> d_hash_shift);
107+
return runtime_const_ptr(dentry_hashtable) +
108+
runtime_const_shift_right_32(hashlen, d_hash_shift);
106109
}
107110

108111
#define IN_LOOKUP_SHIFT 10
@@ -2110,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare(
21102113
unsigned *seqp)
21112114
{
21122115
u64 hashlen = name->hash_len;
2113-
struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2116+
struct hlist_bl_head *b = d_hash(hashlen);
21142117
struct hlist_bl_node *node;
21152118
struct dentry *dentry;
21162119

@@ -2177,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
21772180
{
21782181
u64 hashlen = name->hash_len;
21792182
const unsigned char *str = name->name;
2180-
struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2183+
struct hlist_bl_head *b = d_hash(hashlen);
21812184
struct hlist_bl_node *node;
21822185
struct dentry *dentry;
21832186

@@ -3132,6 +3135,9 @@ static void __init dcache_init_early(void)
31323135
0,
31333136
0);
31343137
d_hash_shift = 32 - d_hash_shift;
3138+
3139+
runtime_const_init(shift, d_hash_shift);
3140+
runtime_const_init(ptr, dentry_hashtable);
31353141
}
31363142

31373143
static void __init dcache_init(void)
@@ -3160,6 +3166,9 @@ static void __init dcache_init(void)
31603166
0,
31613167
0);
31623168
d_hash_shift = 32 - d_hash_shift;
3169+
3170+
runtime_const_init(shift, d_hash_shift);
3171+
runtime_const_init(ptr, dentry_hashtable);
31633172
}
31643173

31653174
/* SLAB cache for __getname() consumers */

include/asm-generic/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ mandatory-y += pci.h
4646
mandatory-y += percpu.h
4747
mandatory-y += pgalloc.h
4848
mandatory-y += preempt.h
49+
mandatory-y += runtime-const.h
4950
mandatory-y += rwonce.h
5051
mandatory-y += sections.h
5152
mandatory-y += serial.h
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_RUNTIME_CONST_H
3+
#define _ASM_RUNTIME_CONST_H
4+
5+
/*
6+
* This is the fallback for when the architecture doesn't
7+
* support the runtime const operations.
8+
*
9+
* We just use the actual symbols as-is.
10+
*/
11+
#define runtime_const_ptr(sym) (sym)
12+
#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
13+
#define runtime_const_init(type,sym) do { } while (0)
14+
15+
#endif

include/asm-generic/vmlinux.lds.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,14 @@
944944
#define CON_INITCALL \
945945
BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
946946

947+
#define RUNTIME_NAME(t,x) runtime_##t##_##x
948+
949+
#define RUNTIME_CONST(t,x) \
950+
. = ALIGN(8); \
951+
RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \
952+
*(RUNTIME_NAME(t,x)); \
953+
}
954+
947955
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
948956
#define KUNIT_TABLE() \
949957
. = ALIGN(8); \

0 commit comments

Comments
 (0)