Skip to content

Commit 1f67624

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
x86/alternatives: Implement a better poke_int3_handler() completion scheme
Commit: 285a54e ("x86/alternatives: Sync bp_patching update for avoiding NULL pointer exception") added an additional text_poke_sync() IPI to text_poke_bp_batch() to handle the rare case where another CPU is still inside an INT3 handler while we clear the global state. Instead of spraying IPIs around, count the active INT3 handlers and wait for them to go away before proceeding to clear/reuse the data. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Masami Hiramatsu <[email protected]> Reviewed-by: Daniel Bristot de Oliveira <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
1 parent 46f5cfc commit 1f67624

File tree

1 file changed

+53
-31
lines changed

1 file changed

+53
-31
lines changed

arch/x86/kernel/alternative.c

Lines changed: 53 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -948,10 +948,29 @@ struct text_poke_loc {
948948
const u8 text[POKE_MAX_OPCODE_SIZE];
949949
};
950950

951-
static struct bp_patching_desc {
951+
struct bp_patching_desc {
952952
struct text_poke_loc *vec;
953953
int nr_entries;
954-
} bp_patching;
954+
atomic_t refs;
955+
};
956+
957+
static struct bp_patching_desc *bp_desc;
958+
959+
static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
960+
{
961+
struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */
962+
963+
if (!desc || !atomic_inc_not_zero(&desc->refs))
964+
return NULL;
965+
966+
return desc;
967+
}
968+
969+
static inline void put_desc(struct bp_patching_desc *desc)
970+
{
971+
smp_mb__before_atomic();
972+
atomic_dec(&desc->refs);
973+
}
955974

956975
static inline void *text_poke_addr(struct text_poke_loc *tp)
957976
{
@@ -972,26 +991,26 @@ NOKPROBE_SYMBOL(patch_cmp);
972991

973992
int notrace poke_int3_handler(struct pt_regs *regs)
974993
{
994+
struct bp_patching_desc *desc;
975995
struct text_poke_loc *tp;
996+
int len, ret = 0;
976997
void *ip;
977-
int len;
998+
999+
if (user_mode(regs))
1000+
return 0;
9781001

9791002
/*
9801003
* Having observed our INT3 instruction, we now must observe
981-
* bp_patching.nr_entries.
1004+
* bp_desc:
9821005
*
983-
* nr_entries != 0 INT3
1006+
* bp_desc = desc INT3
9841007
* WMB RMB
985-
* write INT3 if (nr_entries)
986-
*
987-
* Idem for other elements in bp_patching.
1008+
* write INT3 if (desc)
9881009
*/
9891010
smp_rmb();
9901011

991-
if (likely(!bp_patching.nr_entries))
992-
return 0;
993-
994-
if (user_mode(regs))
1012+
desc = try_get_desc(&bp_desc);
1013+
if (!desc)
9951014
return 0;
9961015

9971016
/*
@@ -1002,16 +1021,16 @@ int notrace poke_int3_handler(struct pt_regs *regs)
10021021
/*
10031022
* Skip the binary search if there is a single member in the vector.
10041023
*/
1005-
if (unlikely(bp_patching.nr_entries > 1)) {
1006-
tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
1024+
if (unlikely(desc->nr_entries > 1)) {
1025+
tp = bsearch(ip, desc->vec, desc->nr_entries,
10071026
sizeof(struct text_poke_loc),
10081027
patch_cmp);
10091028
if (!tp)
1010-
return 0;
1029+
goto out_put;
10111030
} else {
1012-
tp = bp_patching.vec;
1031+
tp = desc->vec;
10131032
if (text_poke_addr(tp) != ip)
1014-
return 0;
1033+
goto out_put;
10151034
}
10161035

10171036
len = text_opcode_size(tp->opcode);
@@ -1023,7 +1042,7 @@ int notrace poke_int3_handler(struct pt_regs *regs)
10231042
* Someone poked an explicit INT3, they'll want to handle it,
10241043
* do not consume.
10251044
*/
1026-
return 0;
1045+
goto out_put;
10271046

10281047
case CALL_INSN_OPCODE:
10291048
int3_emulate_call(regs, (long)ip + tp->rel32);
@@ -1038,7 +1057,11 @@ int notrace poke_int3_handler(struct pt_regs *regs)
10381057
BUG();
10391058
}
10401059

1041-
return 1;
1060+
ret = 1;
1061+
1062+
out_put:
1063+
put_desc(desc);
1064+
return ret;
10421065
}
10431066
NOKPROBE_SYMBOL(poke_int3_handler);
10441067

@@ -1069,14 +1092,18 @@ static int tp_vec_nr;
10691092
*/
10701093
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
10711094
{
1095+
struct bp_patching_desc desc = {
1096+
.vec = tp,
1097+
.nr_entries = nr_entries,
1098+
.refs = ATOMIC_INIT(1),
1099+
};
10721100
unsigned char int3 = INT3_INSN_OPCODE;
10731101
unsigned int i;
10741102
int do_sync;
10751103

10761104
lockdep_assert_held(&text_mutex);
10771105

1078-
bp_patching.vec = tp;
1079-
bp_patching.nr_entries = nr_entries;
1106+
smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
10801107

10811108
/*
10821109
* Corresponding read barrier in int3 notifier for making sure the
@@ -1131,17 +1158,12 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
11311158
text_poke_sync();
11321159

11331160
/*
1134-
* sync_core() implies an smp_mb() and orders this store against
1135-
* the writing of the new instruction.
1161+
* Remove and synchronize_rcu(), except we have a very primitive
1162+
* refcount based completion.
11361163
*/
1137-
bp_patching.nr_entries = 0;
1138-
/*
1139-
* This sync_core () call ensures that all INT3 handlers in progress
1140-
* have finished. This allows poke_int3_handler() after this to
1141-
* avoid touching bp_paching.vec by checking nr_entries == 0.
1142-
*/
1143-
text_poke_sync();
1144-
bp_patching.vec = NULL;
1164+
WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
1165+
if (!atomic_dec_and_test(&desc.refs))
1166+
atomic_cond_read_acquire(&desc.refs, !VAL);
11451167
}
11461168

11471169
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,

0 commit comments

Comments
 (0)