Skip to content

Commit 5ae1750

Browse files
Ewan D. Milnemartinkpetersen
authored andcommitted
scsi: core: Avoid leaving shost->last_reset with stale value if EH does not run
The changes to issue the abort from the scmd->abort_work instead of the EH thread introduced a problem if eh_deadline is used. If aborting the command(s) is successful, and there are never any scmds added to the shost->eh_cmd_q, there is no code path which will reset the ->last_reset value back to zero. The effect of this is that after a successful abort with no EH thread activity, a subsequent timeout, perhaps a long time later, might immediately be considered past a user-set eh_deadline time, and the host will be reset with no attempt at recovery. Fix this by resetting ->last_reset back to zero in scmd_eh_abort_handler() if it is determined that the EH thread will not run to do this. Thanks to Gopinath Marappan for investigating this problem. Link: https://lore.kernel.org/r/[email protected] Fixes: e494f6a ("[SCSI] improved eh timeout handler") Cc: [email protected] Signed-off-by: Ewan D. Milne <[email protected]> Signed-off-by: Martin K. Petersen <[email protected]>
1 parent 5f7cf82 commit 5ae1750

File tree

5 files changed

+29
-1
lines changed

5 files changed

+29
-1
lines changed

drivers/scsi/hosts.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
387387
shost->shost_state = SHOST_CREATED;
388388
INIT_LIST_HEAD(&shost->__devices);
389389
INIT_LIST_HEAD(&shost->__targets);
390+
INIT_LIST_HEAD(&shost->eh_abort_list);
390391
INIT_LIST_HEAD(&shost->eh_cmd_q);
391392
INIT_LIST_HEAD(&shost->starved_list);
392393
init_waitqueue_head(&shost->host_wait);

drivers/scsi/scsi_error.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,23 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd)
133133
return true;
134134
}
135135

136+
static void scsi_eh_complete_abort(struct scsi_cmnd *scmd, struct Scsi_Host *shost)
137+
{
138+
unsigned long flags;
139+
140+
spin_lock_irqsave(shost->host_lock, flags);
141+
list_del_init(&scmd->eh_entry);
142+
/*
143+
* If the abort succeeds, and there is no further
144+
* EH action, clear the ->last_reset time.
145+
*/
146+
if (list_empty(&shost->eh_abort_list) &&
147+
list_empty(&shost->eh_cmd_q))
148+
if (shost->eh_deadline != -1)
149+
shost->last_reset = 0;
150+
spin_unlock_irqrestore(shost->host_lock, flags);
151+
}
152+
136153
/**
137154
* scmd_eh_abort_handler - Handle command aborts
138155
* @work: command to be aborted.
@@ -150,6 +167,7 @@ scmd_eh_abort_handler(struct work_struct *work)
150167
container_of(work, struct scsi_cmnd, abort_work.work);
151168
struct scsi_device *sdev = scmd->device;
152169
enum scsi_disposition rtn;
170+
unsigned long flags;
153171

154172
if (scsi_host_eh_past_deadline(sdev->host)) {
155173
SCSI_LOG_ERROR_RECOVERY(3,
@@ -173,12 +191,14 @@ scmd_eh_abort_handler(struct work_struct *work)
173191
SCSI_LOG_ERROR_RECOVERY(3,
174192
scmd_printk(KERN_WARNING, scmd,
175193
"retry aborted command\n"));
194+
scsi_eh_complete_abort(scmd, sdev->host);
176195
scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
177196
return;
178197
} else {
179198
SCSI_LOG_ERROR_RECOVERY(3,
180199
scmd_printk(KERN_WARNING, scmd,
181200
"finish aborted command\n"));
201+
scsi_eh_complete_abort(scmd, sdev->host);
182202
scsi_finish_command(scmd);
183203
return;
184204
}
@@ -191,6 +211,9 @@ scmd_eh_abort_handler(struct work_struct *work)
191211
}
192212
}
193213

214+
spin_lock_irqsave(sdev->host->host_lock, flags);
215+
list_del_init(&scmd->eh_entry);
216+
spin_unlock_irqrestore(sdev->host->host_lock, flags);
194217
scsi_eh_scmd_add(scmd);
195218
}
196219

@@ -221,6 +244,8 @@ scsi_abort_command(struct scsi_cmnd *scmd)
221244
spin_lock_irqsave(shost->host_lock, flags);
222245
if (shost->eh_deadline != -1 && !shost->last_reset)
223246
shost->last_reset = jiffies;
247+
BUG_ON(!list_empty(&scmd->eh_entry));
248+
list_add_tail(&scmd->eh_entry, &shost->eh_abort_list);
224249
spin_unlock_irqrestore(shost->host_lock, flags);
225250

226251
scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;

drivers/scsi/scsi_lib.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
11431143
cmd->sense_buffer = buf;
11441144
cmd->prot_sdb = prot;
11451145
cmd->flags = flags;
1146+
INIT_LIST_HEAD(&cmd->eh_entry);
11461147
INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
11471148
cmd->jiffies_at_alloc = jiffies_at_alloc;
11481149
cmd->retries = retries;

include/scsi/scsi_cmnd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ enum scsi_cmnd_submitter {
7373
struct scsi_cmnd {
7474
struct scsi_request req;
7575
struct scsi_device *device;
76-
struct list_head eh_entry; /* entry for the host eh_cmd_q */
76+
struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */
7777
struct delayed_work abort_work;
7878

7979
struct rcu_head rcu;

include/scsi/scsi_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,7 @@ struct Scsi_Host {
551551

552552
struct mutex scan_mutex;/* serialize scanning activity */
553553

554+
struct list_head eh_abort_list;
554555
struct list_head eh_cmd_q;
555556
struct task_struct * ehandler; /* Error recovery thread. */
556557
struct completion * eh_action; /* Wait for specific actions on the

0 commit comments

Comments
 (0)