Skip to content

Commit efd608f

Browse files
anadavPeter Zijlstra
authored andcommitted
x86/alternative: Fix race in try_get_desc()
I encountered some occasional crashes of poke_int3_handler() when kprobes are set, while accessing desc->vec. The text poke mechanism claims to have an RCU-like behavior, but it does not appear that there is any quiescent state to ensure that nobody holds reference to desc. As a result, the following race appears to be possible, which can lead to memory corruption. CPU0 CPU1 ---- ---- text_poke_bp_batch() -> smp_store_release(&bp_desc, &desc) [ notice that desc is on the stack ] poke_int3_handler() [ int3 might be kprobe's so sync events are do not help ] -> try_get_desc(descp=&bp_desc) desc = __READ_ONCE(bp_desc) if (!desc) [false, success] WRITE_ONCE(bp_desc, NULL); atomic_dec_and_test(&desc.refs) [ success, desc space on the stack is being reused and might have non-zero value. ] arch_atomic_inc_not_zero(&desc->refs) [ might succeed since desc points to stack memory that was freed and might be reused. ] Fix this issue with small backportable patch. Instead of trying to make RCU-like behavior for bp_desc, just eliminate the unnecessary level of indirection of bp_desc, and hold the whole descriptor as a global. Anyhow, there is only a single descriptor at any given moment. Fixes: 1f67624 ("x86/alternatives: Implement a better poke_int3_handler() completion scheme") Signed-off-by: Nadav Amit <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected]
1 parent e400ad8 commit efd608f

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

arch/x86/kernel/alternative.c

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,22 +1319,23 @@ struct bp_patching_desc {
13191319
atomic_t refs;
13201320
};
13211321

1322-
static struct bp_patching_desc *bp_desc;
1322+
static struct bp_patching_desc bp_desc;
13231323

13241324
static __always_inline
1325-
struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
1325+
struct bp_patching_desc *try_get_desc(void)
13261326
{
1327-
/* rcu_dereference */
1328-
struct bp_patching_desc *desc = __READ_ONCE(*descp);
1327+
struct bp_patching_desc *desc = &bp_desc;
13291328

1330-
if (!desc || !arch_atomic_inc_not_zero(&desc->refs))
1329+
if (!arch_atomic_inc_not_zero(&desc->refs))
13311330
return NULL;
13321331

13331332
return desc;
13341333
}
13351334

1336-
static __always_inline void put_desc(struct bp_patching_desc *desc)
1335+
static __always_inline void put_desc(void)
13371336
{
1337+
struct bp_patching_desc *desc = &bp_desc;
1338+
13381339
smp_mb__before_atomic();
13391340
arch_atomic_dec(&desc->refs);
13401341
}
@@ -1367,15 +1368,15 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
13671368

13681369
/*
13691370
* Having observed our INT3 instruction, we now must observe
1370-
* bp_desc:
1371+
* bp_desc with non-zero refcount:
13711372
*
1372-
* bp_desc = desc INT3
1373+
* bp_desc.refs = 1 INT3
13731374
* WMB RMB
1374-
* write INT3 if (desc)
1375+
* write INT3 if (bp_desc.refs != 0)
13751376
*/
13761377
smp_rmb();
13771378

1378-
desc = try_get_desc(&bp_desc);
1379+
desc = try_get_desc();
13791380
if (!desc)
13801381
return 0;
13811382

@@ -1429,7 +1430,7 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
14291430
ret = 1;
14301431

14311432
out_put:
1432-
put_desc(desc);
1433+
put_desc();
14331434
return ret;
14341435
}
14351436

@@ -1460,18 +1461,20 @@ static int tp_vec_nr;
14601461
*/
14611462
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
14621463
{
1463-
struct bp_patching_desc desc = {
1464-
.vec = tp,
1465-
.nr_entries = nr_entries,
1466-
.refs = ATOMIC_INIT(1),
1467-
};
14681464
unsigned char int3 = INT3_INSN_OPCODE;
14691465
unsigned int i;
14701466
int do_sync;
14711467

14721468
lockdep_assert_held(&text_mutex);
14731469

1474-
smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
1470+
bp_desc.vec = tp;
1471+
bp_desc.nr_entries = nr_entries;
1472+
1473+
/*
1474+
* Corresponds to the implicit memory barrier in try_get_desc() to
1475+
* ensure reading a non-zero refcount provides up to date bp_desc data.
1476+
*/
1477+
atomic_set_release(&bp_desc.refs, 1);
14751478

14761479
/*
14771480
* Corresponding read barrier in int3 notifier for making sure the
@@ -1559,12 +1562,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
15591562
text_poke_sync();
15601563

15611564
/*
1562-
* Remove and synchronize_rcu(), except we have a very primitive
1563-
* refcount based completion.
1565+
* Remove and wait for refs to be zero.
15641566
*/
1565-
WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
1566-
if (!atomic_dec_and_test(&desc.refs))
1567-
atomic_cond_read_acquire(&desc.refs, !VAL);
1567+
if (!atomic_dec_and_test(&bp_desc.refs))
1568+
atomic_cond_read_acquire(&bp_desc.refs, !VAL);
15681569
}
15691570

15701571
static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,

0 commit comments

Comments
 (0)