Skip to content

Commit a942908

Browse files
committed
Merge tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Thomas Gleixner: "RAS updates from Borislav Petkov: - Unmap a whole guest page if an MCE is encountered in it to avoid follow-on MCEs leading to the guest crashing, by Tony Luck. This change collided with the entry changes and the merge resolution would have been rather unpleasant. To avoid that the entry branch was merged in before applying this. The resulting code did not change over the rebase. - AMD MCE error thresholding machinery cleanup and hotplug sanitization, by Thomas Gleixner. - Change the MCE notifiers to denote whether they have handled the error and not break the chain early by returning NOTIFY_STOP, thus giving the opportunity for the later handlers in the chain to see it. By Tony Luck. - Add AMD family 0x17, models 0x60-6f support, by Alexander Monakov. - Last but not least, the usual round of fixes and improvements" * tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/mce/dev-mcelog: Fix -Wstringop-truncation warning about strncpy() x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned EDAC/amd64: Add AMD family 17h model 60h PCI IDs hwmon: (k10temp) Add AMD family 17h model 60h PCI match x86/amd_nb: Add AMD family 17h model 60h PCI IDs x86/mcelog: Add compat_ioctl for 32-bit mcelog support x86/mce: Drop bogus comment about mce.kflags x86/mce: Fixup exception only for the correct MCEs EDAC: Drop the EDAC report status checks x86/mce: Add mce=print_all option x86/mce: Change default MCE logger to check mce->kflags x86/mce: Fix all mce notifiers to update the mce->kflags bitmask x86/mce: Add a struct mce.kflags field x86/mce: Convert the CEC to use the MCE notifier x86/mce: Rename "first" function as "early" x86/mce/amd, edac: Remove report_gart_errors x86/mce/amd: Make threshold bank setting hotplug robust x86/mce/amd: Cleanup threshold device remove path x86/mce/amd: Straighten CPU hotplug path x86/mce/amd: Sanitize thresholding device creation hotplug path ...
2 parents 076f14b + 7ccddc4 commit a942908

File tree

28 files changed

+306
-320
lines changed

28 files changed

+306
-320
lines changed

arch/x86/include/asm/amd_nb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct threshold_bank {
5757

5858
/* initialized to the number of CPUs on the node sharing this bank */
5959
refcount_t cpus;
60+
unsigned int shared;
6061
};
6162

6263
struct amd_northbridge {

arch/x86/include/asm/mce.h

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,17 @@
127127
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
128128
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
129129

130+
#define XEC(x, mask) (((x) >> 16) & mask)
131+
132+
/* mce.kflags flag bits for logging etc. */
133+
#define MCE_HANDLED_CEC BIT_ULL(0)
134+
#define MCE_HANDLED_UC BIT_ULL(1)
135+
#define MCE_HANDLED_EXTLOG BIT_ULL(2)
136+
#define MCE_HANDLED_NFIT BIT_ULL(3)
137+
#define MCE_HANDLED_EDAC BIT_ULL(4)
138+
#define MCE_HANDLED_MCELOG BIT_ULL(5)
139+
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
140+
130141
/*
131142
* This structure contains all data related to the MCE log. Also
132143
* carries a signature to make it easier to find from external
@@ -142,14 +153,16 @@ struct mce_log_buffer {
142153
struct mce entry[];
143154
};
144155

156+
/* Highest last */
145157
enum mce_notifier_prios {
146-
MCE_PRIO_FIRST = INT_MAX,
147-
MCE_PRIO_UC = INT_MAX - 1,
148-
MCE_PRIO_EXTLOG = INT_MAX - 2,
149-
MCE_PRIO_NFIT = INT_MAX - 3,
150-
MCE_PRIO_EDAC = INT_MAX - 4,
151-
MCE_PRIO_MCELOG = 1,
152-
MCE_PRIO_LOWEST = 0,
158+
MCE_PRIO_LOWEST,
159+
MCE_PRIO_MCELOG,
160+
MCE_PRIO_EDAC,
161+
MCE_PRIO_NFIT,
162+
MCE_PRIO_EXTLOG,
163+
MCE_PRIO_UC,
164+
MCE_PRIO_EARLY,
165+
MCE_PRIO_CEC
153166
};
154167

155168
struct notifier_block;
@@ -347,5 +360,4 @@ umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return
347360
#endif
348361

349362
static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
350-
351363
#endif /* _ASM_X86_MCE_H */

arch/x86/include/asm/set_memory.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,28 +86,35 @@ int set_direct_map_default_noflush(struct page *page);
8686
extern int kernel_set_to_readonly;
8787

8888
#ifdef CONFIG_X86_64
89-
static inline int set_mce_nospec(unsigned long pfn)
89+
/*
90+
* Prevent speculative access to the page by either unmapping
91+
* it (if we do not require access to any part of the page) or
92+
* marking it uncacheable (if we want to try to retrieve data
93+
* from non-poisoned lines in the page).
94+
*/
95+
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
9096
{
9197
unsigned long decoy_addr;
9298
int rc;
9399

94100
/*
95-
* Mark the linear address as UC to make sure we don't log more
96-
* errors because of speculative access to the page.
97101
* We would like to just call:
98-
* set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
102+
* set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1);
99103
* but doing that would radically increase the odds of a
100104
* speculative access to the poison page because we'd have
101105
* the virtual address of the kernel 1:1 mapping sitting
102106
* around in registers.
103107
* Instead we get tricky. We create a non-canonical address
104108
* that looks just like the one we want, but has bit 63 flipped.
105-
* This relies on set_memory_uc() properly sanitizing any __pa()
109+
* This relies on set_memory_XX() properly sanitizing any __pa()
106110
* results with __PHYSICAL_MASK or PTE_PFN_MASK.
107111
*/
108112
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
109113

110-
rc = set_memory_uc(decoy_addr, 1);
114+
if (unmap)
115+
rc = set_memory_np(decoy_addr, 1);
116+
else
117+
rc = set_memory_uc(decoy_addr, 1);
111118
if (rc)
112119
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
113120
return rc;

arch/x86/include/uapi/asm/mce.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct mce {
3535
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
3636
__u64 ppin; /* Protected Processor Inventory Number */
3737
__u32 microcode; /* Microcode revision */
38+
__u64 kflags; /* Internal kernel use */
3839
};
3940

4041
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)

arch/x86/kernel/amd_nb.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
1919
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
2020
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
21+
#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
2122
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
2223
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
2324
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
25+
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
2426
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
2527
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
2628

@@ -33,6 +35,7 @@ static const struct pci_device_id amd_root_ids[] = {
3335
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
3436
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
3537
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
38+
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
3639
{}
3740
};
3841

@@ -50,6 +53,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
5053
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
5154
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
5255
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
56+
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
5357
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
5458
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
5559
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
@@ -65,6 +69,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
6569
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
6670
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
6771
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
72+
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
6873
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
6974
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
7075
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },

0 commit comments

Comments
 (0)