Skip to content

Commit 30bafe1

Browse files
sumitsaxena11martinkpetersen
authored andcommitted
scsi: mpi3mr: Support PCI Error Recovery callback handlers
PCI Error recovery support is required to recover the controller upon detection of PCI errors. Add support for the PCI error recovery callback handlers in mpi3mr driver. Co-developed-by: Sathya Prakash <[email protected]> Signed-off-by: Sathya Prakash <[email protected]> Co-developed-by: Ranjan Kumar <[email protected]> Signed-off-by: Ranjan Kumar <[email protected]> Signed-off-by: Sumit Saxena <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Martin K. Petersen <[email protected]>
1 parent 3f7e469 commit 30bafe1

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed

drivers/scsi/mpi3mr/mpi3mr.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <linux/miscdevice.h>
2424
#include <linux/module.h>
2525
#include <linux/pci.h>
26+
#include <linux/aer.h>
2627
#include <linux/poll.h>
2728
#include <linux/sched.h>
2829
#include <linux/slab.h>
@@ -129,6 +130,7 @@ extern atomic64_t event_counter;
129130
#define MPI3MR_PREPARE_FOR_RESET_TIMEOUT 180
130131
#define MPI3MR_RESET_ACK_TIMEOUT 30
131132
#define MPI3MR_MUR_TIMEOUT 120
133+
#define MPI3MR_RESET_TIMEOUT 510
132134

133135
#define MPI3MR_WATCHDOG_INTERVAL 1000 /* in milli seconds */
134136

@@ -1153,6 +1155,8 @@ struct scmd_priv {
11531155
* @trace_release_trigger_active: Trace trigger active flag
11541156
* @fw_release_trigger_active: Fw release trigger active flag
11551157
* @snapdump_trigger_active: Snapdump trigger active flag
1158+
* @pci_err_recovery: PCI error recovery in progress
1159+
* @block_on_pci_err: Block IO during PCI error recovery
11561160
*/
11571161
struct mpi3mr_ioc {
11581162
struct list_head list;
@@ -1353,6 +1357,8 @@ struct mpi3mr_ioc {
13531357
bool snapdump_trigger_active;
13541358
bool trace_release_trigger_active;
13551359
bool fw_release_trigger_active;
1360+
bool pci_err_recovery;
1361+
bool block_on_pci_err;
13561362
};
13571363

13581364
/**

drivers/scsi/mpi3mr/mpi3mr_os.c

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5546,6 +5546,197 @@ mpi3mr_resume(struct device *dev)
55465546
return 0;
55475547
}
55485548

5549+
/**
5550+
* mpi3mr_pcierr_error_detected - PCI error detected callback
5551+
* @pdev: PCI device instance
5552+
* @state: channel state
5553+
*
5554+
* This function is called by the PCI error recovery driver and
5555+
* based on the state passed the driver decides what actions to
5556+
* be recommended back to PCI driver.
5557+
*
5558+
* For all of the states if there is no valid mrioc or scsi host
5559+
* references in the PCI device then this function will return
5560+
* the result as disconnect.
5561+
*
5562+
* For normal state, this function will return the result as can
5563+
* recover.
5564+
*
5565+
* For frozen state, this function will block for any pending
5566+
* controller initialization or re-initialization to complete,
5567+
* stop any new interactions with the controller and return
5568+
* status as reset required.
5569+
*
5570+
* For permanent failure state, this function will mark the
5571+
* controller as unrecoverable and return status as disconnect.
5572+
*
5573+
* Returns: PCI_ERS_RESULT_NEED_RESET or CAN_RECOVER or
5574+
* DISCONNECT based on the controller state.
5575+
*/
5576+
static pci_ers_result_t
5577+
mpi3mr_pcierr_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5578+
{
5579+
struct Scsi_Host *shost;
5580+
struct mpi3mr_ioc *mrioc;
5581+
unsigned int timeout = MPI3MR_RESET_TIMEOUT;
5582+
5583+
dev_info(&pdev->dev, "%s: callback invoked state(%d)\n", __func__,
5584+
state);
5585+
5586+
shost = pci_get_drvdata(pdev);
5587+
mrioc = shost_priv(shost);
5588+
5589+
switch (state) {
5590+
case pci_channel_io_normal:
5591+
return PCI_ERS_RESULT_CAN_RECOVER;
5592+
case pci_channel_io_frozen:
5593+
mrioc->pci_err_recovery = true;
5594+
mrioc->block_on_pci_err = true;
5595+
do {
5596+
if (mrioc->reset_in_progress || mrioc->is_driver_loading)
5597+
ssleep(1);
5598+
else
5599+
break;
5600+
} while (--timeout);
5601+
5602+
if (!timeout) {
5603+
mrioc->pci_err_recovery = true;
5604+
mrioc->block_on_pci_err = true;
5605+
mrioc->unrecoverable = 1;
5606+
mpi3mr_stop_watchdog(mrioc);
5607+
mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
5608+
return PCI_ERS_RESULT_DISCONNECT;
5609+
}
5610+
5611+
scsi_block_requests(mrioc->shost);
5612+
mpi3mr_stop_watchdog(mrioc);
5613+
mpi3mr_cleanup_resources(mrioc);
5614+
return PCI_ERS_RESULT_NEED_RESET;
5615+
case pci_channel_io_perm_failure:
5616+
mrioc->pci_err_recovery = true;
5617+
mrioc->block_on_pci_err = true;
5618+
mrioc->unrecoverable = 1;
5619+
mpi3mr_stop_watchdog(mrioc);
5620+
mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
5621+
return PCI_ERS_RESULT_DISCONNECT;
5622+
default:
5623+
return PCI_ERS_RESULT_DISCONNECT;
5624+
}
5625+
}
5626+
5627+
/**
5628+
* mpi3mr_pcierr_slot_reset - Post slot reset callback
5629+
* @pdev: PCI device instance
5630+
*
5631+
* This function is called by the PCI error recovery driver
5632+
* after a slot or link reset issued by it for the recovery, the
5633+
* driver is expected to bring back the controller and
5634+
* initialize it.
5635+
*
5636+
* This function restores PCI state and reinitializes controller
5637+
* resources and the controller, this blocks for any pending
5638+
* reset to complete.
5639+
*
5640+
* Returns: PCI_ERS_RESULT_DISCONNECT on failure or
5641+
* PCI_ERS_RESULT_RECOVERED
5642+
*/
5643+
static pci_ers_result_t mpi3mr_pcierr_slot_reset(struct pci_dev *pdev)
5644+
{
5645+
struct Scsi_Host *shost;
5646+
struct mpi3mr_ioc *mrioc;
5647+
unsigned int timeout = MPI3MR_RESET_TIMEOUT;
5648+
5649+
dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
5650+
5651+
shost = pci_get_drvdata(pdev);
5652+
mrioc = shost_priv(shost);
5653+
5654+
do {
5655+
if (mrioc->reset_in_progress)
5656+
ssleep(1);
5657+
else
5658+
break;
5659+
} while (--timeout);
5660+
5661+
if (!timeout)
5662+
goto out_failed;
5663+
5664+
pci_restore_state(pdev);
5665+
5666+
if (mpi3mr_setup_resources(mrioc)) {
5667+
ioc_err(mrioc, "setup resources failed\n");
5668+
goto out_failed;
5669+
}
5670+
mrioc->unrecoverable = 0;
5671+
mrioc->pci_err_recovery = false;
5672+
5673+
if (mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_FIRMWARE, 0))
5674+
goto out_failed;
5675+
5676+
return PCI_ERS_RESULT_RECOVERED;
5677+
5678+
out_failed:
5679+
mrioc->unrecoverable = 1;
5680+
mrioc->block_on_pci_err = false;
5681+
scsi_unblock_requests(shost);
5682+
mpi3mr_start_watchdog(mrioc);
5683+
return PCI_ERS_RESULT_DISCONNECT;
5684+
}
5685+
5686+
/**
5687+
* mpi3mr_pcierr_resume - PCI error recovery resume
5688+
* callback
5689+
* @pdev: PCI device instance
5690+
*
5691+
* This function enables all I/O and IOCTLs post reset issued as
5692+
* part of the PCI error recovery
5693+
*
5694+
* Return: Nothing.
5695+
*/
5696+
static void mpi3mr_pcierr_resume(struct pci_dev *pdev)
5697+
{
5698+
struct Scsi_Host *shost;
5699+
struct mpi3mr_ioc *mrioc;
5700+
5701+
dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
5702+
5703+
shost = pci_get_drvdata(pdev);
5704+
mrioc = shost_priv(shost);
5705+
5706+
if (mrioc->block_on_pci_err) {
5707+
mrioc->block_on_pci_err = false;
5708+
scsi_unblock_requests(shost);
5709+
mpi3mr_start_watchdog(mrioc);
5710+
}
5711+
}
5712+
5713+
/**
5714+
* mpi3mr_pcierr_mmio_enabled - PCI error recovery callback
5715+
* @pdev: PCI device instance
5716+
*
5717+
* This is called only if mpi3mr_pcierr_error_detected returns
5718+
* PCI_ERS_RESULT_CAN_RECOVER.
5719+
*
5720+
* Return: PCI_ERS_RESULT_DISCONNECT when the controller is
5721+
* unrecoverable or when the shost/mrioc reference cannot be
5722+
* found, else return PCI_ERS_RESULT_RECOVERED
5723+
*/
5724+
static pci_ers_result_t mpi3mr_pcierr_mmio_enabled(struct pci_dev *pdev)
5725+
{
5726+
struct Scsi_Host *shost;
5727+
struct mpi3mr_ioc *mrioc;
5728+
5729+
dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
5730+
5731+
shost = pci_get_drvdata(pdev);
5732+
mrioc = shost_priv(shost);
5733+
5734+
if (mrioc->unrecoverable)
5735+
return PCI_ERS_RESULT_DISCONNECT;
5736+
5737+
return PCI_ERS_RESULT_RECOVERED;
5738+
}
5739+
55495740
static const struct pci_device_id mpi3mr_pci_id_table[] = {
55505741
{
55515742
PCI_DEVICE_SUB(MPI3_MFGPAGE_VENDORID_BROADCOM,
@@ -5563,6 +5754,13 @@ static const struct pci_device_id mpi3mr_pci_id_table[] = {
55635754
};
55645755
MODULE_DEVICE_TABLE(pci, mpi3mr_pci_id_table);
55655756

5757+
static struct pci_error_handlers mpi3mr_err_handler = {
5758+
.error_detected = mpi3mr_pcierr_error_detected,
5759+
.mmio_enabled = mpi3mr_pcierr_mmio_enabled,
5760+
.slot_reset = mpi3mr_pcierr_slot_reset,
5761+
.resume = mpi3mr_pcierr_resume,
5762+
};
5763+
55665764
static SIMPLE_DEV_PM_OPS(mpi3mr_pm_ops, mpi3mr_suspend, mpi3mr_resume);
55675765

55685766
static struct pci_driver mpi3mr_pci_driver = {
@@ -5571,6 +5769,7 @@ static struct pci_driver mpi3mr_pci_driver = {
55715769
.probe = mpi3mr_probe,
55725770
.remove = mpi3mr_remove,
55735771
.shutdown = mpi3mr_shutdown,
5772+
.err_handler = &mpi3mr_err_handler,
55745773
.driver.pm = &mpi3mr_pm_ops,
55755774
};
55765775

0 commit comments

Comments
 (0)