Skip to content

Commit 5aff0f3

Browse files
committed
Merge branch 'pci/aer'
- Fix sysfs paths in aer_rootport_total_err_* documentation (Johan Hovold) - Block runtime suspend while handling AER errors (Stanislaw Gruszka) - Add a generic Header Log structure and reader shared by AER and DPC (Ilpo Järvinen) * pci/aer: PCI/AER: Generalize TLP Header Log reading PCI/AER: Use explicit register size for PCI_ERR_CAP PCI/AER: Block runtime suspend when handling errors PCI/AER: Clean up version indentation in ABI docs PCI/AER: Fix rootport attribute paths in ABI docs
2 parents 6613476 + 0a5a46a commit 5aff0f3

File tree

9 files changed

+80
-47
lines changed

9 files changed

+80
-47
lines changed

Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ saw any problems).
1111

1212
What: /sys/bus/pci/devices/<dev>/aer_dev_correctable
1313
Date: July 2018
14-
KernelVersion: 4.19.0
14+
KernelVersion: 4.19.0
1515
1616
Description: List of correctable errors seen and reported by this
1717
PCI device using ERR_COR. Note that since multiple errors may
@@ -32,7 +32,7 @@ Description: List of correctable errors seen and reported by this
3232

3333
What: /sys/bus/pci/devices/<dev>/aer_dev_fatal
3434
Date: July 2018
35-
KernelVersion: 4.19.0
35+
KernelVersion: 4.19.0
3636
3737
Description: List of uncorrectable fatal errors seen and reported by this
3838
PCI device using ERR_FATAL. Note that since multiple errors may
@@ -62,7 +62,7 @@ Description: List of uncorrectable fatal errors seen and reported by this
6262

6363
What: /sys/bus/pci/devices/<dev>/aer_dev_nonfatal
6464
Date: July 2018
65-
KernelVersion: 4.19.0
65+
KernelVersion: 4.19.0
6666
6767
Description: List of uncorrectable nonfatal errors seen and reported by this
6868
PCI device using ERR_NONFATAL. Note that since multiple errors
@@ -100,20 +100,20 @@ collectors) that are AER capable. These indicate the number of error messages as
100100
device, so these counters include them and are thus cumulative of all the error
101101
messages on the PCI hierarchy originating at that root port.
102102

103-
What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_cor
103+
What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_cor
104104
Date: July 2018
105-
KernelVersion: 4.19.0
105+
KernelVersion: 4.19.0
106106
107107
Description: Total number of ERR_COR messages reported to rootport.
108108

109-
What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_fatal
109+
What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_fatal
110110
Date: July 2018
111-
KernelVersion: 4.19.0
111+
KernelVersion: 4.19.0
112112
113113
Description: Total number of ERR_FATAL messages reported to rootport.
114114

115-
What: /sys/bus/pci/devices/<dev>/aer_stats/aer_rootport_total_err_nonfatal
115+
What: /sys/bus/pci/devices/<dev>/aer_rootport_total_err_nonfatal
116116
Date: July 2018
117-
KernelVersion: 4.19.0
117+
KernelVersion: 4.19.0
118118
119119
Description: Total number of ERR_NONFATAL messages reported to rootport.

drivers/firmware/efi/cper.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
445445
printk("%saer_uncor_severity: 0x%08x\n",
446446
pfx, aer->uncor_severity);
447447
printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
448-
aer->header_log.dw0, aer->header_log.dw1,
449-
aer->header_log.dw2, aer->header_log.dw3);
448+
aer->header_log.dw[0], aer->header_log.dw[1],
449+
aer->header_log.dw[2], aer->header_log.dw[3]);
450450
}
451451
}
452452

drivers/pci/pci.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,34 @@ static void pci_enable_acs(struct pci_dev *dev)
10671067
pci_disable_acs_redir(dev);
10681068
}
10691069

1070+
/**
1071+
* pcie_read_tlp_log - read TLP Header Log
1072+
* @dev: PCIe device
1073+
* @where: PCI Config offset of TLP Header Log
1074+
* @tlp_log: TLP Log structure to fill
1075+
*
1076+
* Fill @tlp_log from TLP Header Log registers, e.g., AER or DPC.
1077+
*
1078+
* Return: 0 on success and filled TLP Log structure, <0 on error.
1079+
*/
1080+
int pcie_read_tlp_log(struct pci_dev *dev, int where,
1081+
struct pcie_tlp_log *tlp_log)
1082+
{
1083+
int i, ret;
1084+
1085+
memset(tlp_log, 0, sizeof(*tlp_log));
1086+
1087+
for (i = 0; i < 4; i++) {
1088+
ret = pci_read_config_dword(dev, where + i * 4,
1089+
&tlp_log->dw[i]);
1090+
if (ret)
1091+
return pcibios_err_to_errno(ret);
1092+
}
1093+
1094+
return 0;
1095+
}
1096+
EXPORT_SYMBOL_GPL(pcie_read_tlp_log);
1097+
10701098
/**
10711099
* pci_restore_bars - restore a device's BAR values (e.g. after wake-up)
10721100
* @dev: PCI device to have its BARs restored

drivers/pci/pci.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ struct aer_err_info {
409409

410410
unsigned int status; /* COR/UNCOR Error Status */
411411
unsigned int mask; /* COR/UNCOR Error Mask */
412-
struct aer_header_log_regs tlp; /* TLP Header */
412+
struct pcie_tlp_log tlp; /* TLP Header */
413413
};
414414

415415
int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);

drivers/pci/pcie/aer.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -664,11 +664,10 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
664664
}
665665
}
666666

667-
static void __print_tlp_header(struct pci_dev *dev,
668-
struct aer_header_log_regs *t)
667+
static void __print_tlp_header(struct pci_dev *dev, struct pcie_tlp_log *t)
669668
{
670669
pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
671-
t->dw0, t->dw1, t->dw2, t->dw3);
670+
t->dw[0], t->dw[1], t->dw[2], t->dw[3]);
672671
}
673672

674673
static void __aer_print_error(struct pci_dev *dev,
@@ -1210,7 +1209,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
12101209
{
12111210
int type = pci_pcie_type(dev);
12121211
int aer = dev->aer_cap;
1213-
int temp;
1212+
u32 aercc;
12141213

12151214
/* Must reset in this function */
12161215
info->status = 0;
@@ -1241,19 +1240,12 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
12411240
return 0;
12421241

12431242
/* Get First Error Pointer */
1244-
pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp);
1245-
info->first_error = PCI_ERR_CAP_FEP(temp);
1243+
pci_read_config_dword(dev, aer + PCI_ERR_CAP, &aercc);
1244+
info->first_error = PCI_ERR_CAP_FEP(aercc);
12461245

12471246
if (info->status & AER_LOG_TLP_MASKS) {
12481247
info->tlp_header_valid = 1;
1249-
pci_read_config_dword(dev,
1250-
aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
1251-
pci_read_config_dword(dev,
1252-
aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
1253-
pci_read_config_dword(dev,
1254-
aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
1255-
pci_read_config_dword(dev,
1256-
aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
1248+
pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, &info->tlp);
12571249
}
12581250
}
12591251

drivers/pci/pcie/dpc.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
190190
static void dpc_process_rp_pio_error(struct pci_dev *pdev)
191191
{
192192
u16 cap = pdev->dpc_cap, dpc_status, first_error;
193-
u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix;
193+
u32 status, mask, sev, syserr, exc, log, prefix;
194+
struct pcie_tlp_log tlp_log;
194195
int i;
195196

196197
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status);
@@ -216,16 +217,9 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev)
216217

217218
if (pdev->dpc_rp_log_size < 4)
218219
goto clear_status;
219-
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
220-
&dw0);
221-
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4,
222-
&dw1);
223-
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8,
224-
&dw2);
225-
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12,
226-
&dw3);
220+
pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &tlp_log);
227221
pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
228-
dw0, dw1, dw2, dw3);
222+
tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]);
229223

230224
if (pdev->dpc_rp_log_size < 5)
231225
goto clear_status;

drivers/pci/pcie/err.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define dev_fmt(fmt) "AER: " fmt
1414

1515
#include <linux/pci.h>
16+
#include <linux/pm_runtime.h>
1617
#include <linux/module.h>
1718
#include <linux/kernel.h>
1819
#include <linux/errno.h>
@@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev,
8586
return 0;
8687
}
8788

89+
static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data)
90+
{
91+
pm_runtime_get_sync(&pdev->dev);
92+
return 0;
93+
}
94+
95+
static int pci_pm_runtime_put(struct pci_dev *pdev, void *data)
96+
{
97+
pm_runtime_put(&pdev->dev);
98+
return 0;
99+
}
100+
88101
static int report_frozen_detected(struct pci_dev *dev, void *data)
89102
{
90103
return report_error_detected(dev, pci_channel_io_frozen, data);
@@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
207220
else
208221
bridge = pci_upstream_bridge(dev);
209222

223+
pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL);
224+
210225
pci_dbg(bridge, "broadcast error_detected message\n");
211226
if (state == pci_channel_io_frozen) {
212227
pci_walk_bridge(bridge, report_frozen_detected, &status);
@@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
251266
pcie_clear_device_status(dev);
252267
pci_aer_clear_nonfatal_status(dev);
253268
}
269+
270+
pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
271+
254272
pci_info(bridge, "device recovery successful\n");
255273
return status;
256274

257275
failed:
276+
pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
277+
258278
pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
259279

260280
/* TODO: Should kernel panic here? */

include/linux/aer.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,8 @@
1818

1919
struct pci_dev;
2020

21-
struct aer_header_log_regs {
22-
u32 dw0;
23-
u32 dw1;
24-
u32 dw2;
25-
u32 dw3;
21+
struct pcie_tlp_log {
22+
u32 dw[4];
2623
};
2724

2825
struct aer_capability_regs {
@@ -33,13 +30,15 @@ struct aer_capability_regs {
3330
u32 cor_status;
3431
u32 cor_mask;
3532
u32 cap_control;
36-
struct aer_header_log_regs header_log;
33+
struct pcie_tlp_log header_log;
3734
u32 root_command;
3835
u32 root_status;
3936
u16 cor_err_source;
4037
u16 uncor_err_source;
4138
};
4239

40+
int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log);
41+
4342
#if defined(CONFIG_PCIEAER)
4443
int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
4544
int pcie_aer_is_native(struct pci_dev *dev);

include/ras/ras_event.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ TRACE_EVENT(aer_event,
300300
const u32 status,
301301
const u8 severity,
302302
const u8 tlp_header_valid,
303-
struct aer_header_log_regs *tlp),
303+
struct pcie_tlp_log *tlp),
304304

305305
TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
306306

@@ -318,10 +318,10 @@ TRACE_EVENT(aer_event,
318318
__entry->severity = severity;
319319
__entry->tlp_header_valid = tlp_header_valid;
320320
if (tlp_header_valid) {
321-
__entry->tlp_header[0] = tlp->dw0;
322-
__entry->tlp_header[1] = tlp->dw1;
323-
__entry->tlp_header[2] = tlp->dw2;
324-
__entry->tlp_header[3] = tlp->dw3;
321+
__entry->tlp_header[0] = tlp->dw[0];
322+
__entry->tlp_header[1] = tlp->dw[1];
323+
__entry->tlp_header[2] = tlp->dw[2];
324+
__entry->tlp_header[3] = tlp->dw[3];
325325
}
326326
),
327327

0 commit comments

Comments
 (0)