Skip to content

Commit 8f5100d

Browse files
Shifeng LiSaeed Mahameed
authored andcommitted
net/mlx5e: Fix a race in command alloc flow
Fix a cmd->ent use after free due to a race on command entry. Such race occurs when one of the commands releases its last refcount and frees its index and entry while another process running command flush flow takes refcount to this command entry. The process which handles commands flush may see this command as needed to be flushed if the other process allocated a ent->idx but didn't set ent to cmd->ent_arr in cmd_work_handler(). Fix it by moving the assignment of cmd->ent_arr into the spin lock. [70013.081955] BUG: KASAN: use-after-free in mlx5_cmd_trigger_completions+0x1e2/0x4c0 [mlx5_core] [70013.081967] Write of size 4 at addr ffff88880b1510b4 by task kworker/26:1/1433361 [70013.081968] [70013.082028] Workqueue: events aer_isr [70013.082053] Call Trace: [70013.082067] dump_stack+0x8b/0xbb [70013.082086] print_address_description+0x6a/0x270 [70013.082102] kasan_report+0x179/0x2c0 [70013.082173] mlx5_cmd_trigger_completions+0x1e2/0x4c0 [mlx5_core] [70013.082267] mlx5_cmd_flush+0x80/0x180 [mlx5_core] [70013.082304] mlx5_enter_error_state+0x106/0x1d0 [mlx5_core] [70013.082338] mlx5_try_fast_unload+0x2ea/0x4d0 [mlx5_core] [70013.082377] remove_one+0x200/0x2b0 [mlx5_core] [70013.082409] pci_device_remove+0xf3/0x280 [70013.082439] device_release_driver_internal+0x1c3/0x470 [70013.082453] pci_stop_bus_device+0x109/0x160 [70013.082468] pci_stop_and_remove_bus_device+0xe/0x20 [70013.082485] pcie_do_fatal_recovery+0x167/0x550 [70013.082493] aer_isr+0x7d2/0x960 [70013.082543] process_one_work+0x65f/0x12d0 [70013.082556] worker_thread+0x87/0xb50 [70013.082571] kthread+0x2e9/0x3a0 [70013.082592] ret_from_fork+0x1f/0x40 The logical relationship of this error is as follows: aer_recover_work | ent->work -------------------------------------------+------------------------------ aer_recover_work_func | |- pcie_do_recovery | |- report_error_detected | |- mlx5_pci_err_detected |cmd_work_handler |- mlx5_enter_error_state | |- cmd_alloc_index |- enter_error_state | |- lock cmd->alloc_lock |- mlx5_cmd_flush | |- clear_bit |- mlx5_cmd_trigger_completions| |- unlock cmd->alloc_lock |- lock cmd->alloc_lock | |- vector = ~dev->cmd.vars.bitmask |- for_each_set_bit | |- cmd_ent_get(cmd->ent_arr[i]) (UAF) |- unlock cmd->alloc_lock | |- cmd->ent_arr[ent->idx]=ent The cmd->ent_arr[ent->idx] assignment and the bit clearing are not protected by the cmd->alloc_lock in cmd_work_handler(). Fixes: 50b2412 ("net/mlx5: Avoid possible free of command entry while timeout comp handler") Reviewed-by: Moshe Shemesh <[email protected]> Signed-off-by: Shifeng Li <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent ddb38dd commit 8f5100d

File tree

1 file changed

+7
-5
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core

1 file changed

+7
-5
lines changed

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,18 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
156156
return token;
157157
}
158158

159-
static int cmd_alloc_index(struct mlx5_cmd *cmd)
159+
static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent)
160160
{
161161
unsigned long flags;
162162
int ret;
163163

164164
spin_lock_irqsave(&cmd->alloc_lock, flags);
165165
ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds);
166-
if (ret < cmd->vars.max_reg_cmds)
166+
if (ret < cmd->vars.max_reg_cmds) {
167167
clear_bit(ret, &cmd->vars.bitmask);
168+
ent->idx = ret;
169+
cmd->ent_arr[ent->idx] = ent;
170+
}
168171
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
169172

170173
return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM;
@@ -979,7 +982,7 @@ static void cmd_work_handler(struct work_struct *work)
979982
sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem;
980983
down(sem);
981984
if (!ent->page_queue) {
982-
alloc_ret = cmd_alloc_index(cmd);
985+
alloc_ret = cmd_alloc_index(cmd, ent);
983986
if (alloc_ret < 0) {
984987
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
985988
if (ent->callback) {
@@ -994,15 +997,14 @@ static void cmd_work_handler(struct work_struct *work)
994997
up(sem);
995998
return;
996999
}
997-
ent->idx = alloc_ret;
9981000
} else {
9991001
ent->idx = cmd->vars.max_reg_cmds;
10001002
spin_lock_irqsave(&cmd->alloc_lock, flags);
10011003
clear_bit(ent->idx, &cmd->vars.bitmask);
1004+
cmd->ent_arr[ent->idx] = ent;
10021005
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
10031006
}
10041007

1005-
cmd->ent_arr[ent->idx] = ent;
10061008
lay = get_inst(cmd, ent->idx);
10071009
ent->lay = lay;
10081010
memset(lay, 0, sizeof(*lay));

0 commit comments

Comments
 (0)