Skip to content

Commit fbb3bdf

Browse files
hcahcaAlexander Gordeev
authored andcommitted
s390/nmi: Print additional information
In case of an unrecoverable machine check only the machine check interrupt code is printed to the console before the machine is stopped. This makes root cause analysis sometimes hard. Print additional machine check information to make analysis easier. The output now looks like this: Unrecoverable machine check, code: 00400F5F4C3B0000 6.16.0-rc2-11605-g987a9431e53a-dirty HW: IBM 3931 A01 704 (z/VM 7.4.0) PSW: 0706C00180000000 000003FFE0F0462E PFX: 0000000000070000 LBA: 000003FFE0F0462A EDC: 0000000000000000 FSA: 0000000000000000 CRS: 0080000014966A12 0000000087CB41C7 0000000000BFF140 0000000000000000 000000000000FFFF 0000000000BFF140 0000000071000000 0000000087CB41C7 0000000000008000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 00000000024C0007 00000000DB000000 0000000000BFF000 GPRS: FFFFFFFF00000000 000003FFE0F0462E E10EA4F489F897A6 0000000000000000 7FFFFFF2C0413C4C 000003FFE19B7010 0000000000000000 0000000000000000 0000000000000000 00000001F76B3380 000003FFE15D4050 0000000000000005 0000000000000000 0000000000070000 000003FFE0F0586C 0000037FE00B7DA0 System stopped Reviewed-by: Alexander Gordeev <[email protected]> Signed-off-by: Heiko Carstens <[email protected]> Signed-off-by: Alexander Gordeev <[email protected]>
1 parent 819275e commit fbb3bdf

File tree

3 files changed

+75
-5
lines changed

3 files changed

+75
-5
lines changed

arch/s390/include/asm/setup.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ struct parmarea {
4141
char command_line[COMMAND_LINE_SIZE]; /* 0x10480 */
4242
};
4343

44+
extern char arch_hw_string[128];
45+
4446
extern struct parmarea parmarea;
4547

4648
extern unsigned int zlib_dfltcc_support;

arch/s390/kernel/early.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ static inline void strim_all(char *str)
105105
}
106106
}
107107

108+
char arch_hw_string[128];
109+
108110
static noinline __init void setup_arch_string(void)
109111
{
110112
struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
@@ -131,6 +133,7 @@ static noinline __init void setup_arch_string(void)
131133
machine_is_vm() ? "z/VM" :
132134
machine_is_kvm() ? "KVM" : "unknown");
133135
}
136+
sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr);
134137
dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
135138
}
136139

arch/s390/kernel/nmi.c

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
*/
1010

1111
#include <linux/kernel_stat.h>
12+
#include <linux/utsname.h>
1213
#include <linux/cpufeature.h>
1314
#include <linux/init.h>
1415
#include <linux/errno.h>
@@ -115,18 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
115116
return dest;
116117
}
117118

119+
static notrace void nmi_print_info(void)
120+
{
121+
struct lowcore *lc = get_lowcore();
122+
char message[100];
123+
char *ptr;
124+
int i;
125+
126+
ptr = nmi_puts(message, "Unrecoverable machine check, code: ");
127+
ptr = u64_to_hex(ptr, lc->mcck_interruption_code);
128+
ptr = nmi_puts(ptr, "\n");
129+
sclp_emergency_printk(message);
130+
131+
ptr = nmi_puts(message, init_utsname()->release);
132+
ptr = nmi_puts(ptr, "\n");
133+
sclp_emergency_printk(message);
134+
135+
ptr = nmi_puts(message, arch_hw_string);
136+
ptr = nmi_puts(ptr, "\n");
137+
sclp_emergency_printk(message);
138+
139+
ptr = nmi_puts(message, "PSW: ");
140+
ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask);
141+
ptr = nmi_puts(ptr, " ");
142+
ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr);
143+
ptr = nmi_puts(ptr, " PFX: ");
144+
ptr = u64_to_hex(ptr, (u64)get_lowcore());
145+
ptr = nmi_puts(ptr, "\n");
146+
sclp_emergency_printk(message);
147+
148+
ptr = nmi_puts(message, "LBA: ");
149+
ptr = u64_to_hex(ptr, lc->last_break_save_area);
150+
ptr = nmi_puts(ptr, " EDC: ");
151+
ptr = u64_to_hex(ptr, lc->external_damage_code);
152+
ptr = nmi_puts(ptr, " FSA: ");
153+
ptr = u64_to_hex(ptr, lc->failing_storage_address);
154+
ptr = nmi_puts(ptr, "\n");
155+
sclp_emergency_printk(message);
156+
157+
ptr = nmi_puts(message, "CRS:\n");
158+
sclp_emergency_printk(message);
159+
ptr = message;
160+
for (i = 0; i < 16; i++) {
161+
ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val);
162+
ptr = nmi_puts(ptr, " ");
163+
if ((i + 1) % 4 == 0) {
164+
ptr = nmi_puts(ptr, "\n");
165+
sclp_emergency_printk(message);
166+
ptr = message;
167+
}
168+
}
169+
170+
ptr = nmi_puts(message, "GPRS:\n");
171+
sclp_emergency_printk(message);
172+
ptr = message;
173+
for (i = 0; i < 16; i++) {
174+
ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]);
175+
ptr = nmi_puts(ptr, " ");
176+
if ((i + 1) % 4 == 0) {
177+
ptr = nmi_puts(ptr, "\n");
178+
sclp_emergency_printk(message);
179+
ptr = message;
180+
}
181+
}
182+
183+
ptr = nmi_puts(message, "System stopped\n");
184+
sclp_emergency_printk(message);
185+
}
186+
118187
static notrace void s390_handle_damage(void)
119188
{
120189
struct lowcore *lc = get_lowcore();
121190
union ctlreg0 cr0, cr0_new;
122-
char message[100];
123191
psw_t psw_save;
124-
char *ptr;
125192

126193
smp_emergency_stop();
127194
diag_amode31_ops.diag308_reset();
128-
ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
129-
u64_to_hex(ptr, lc->mcck_interruption_code);
130195

131196
/*
132197
* Disable low address protection and make machine check new PSW a
@@ -140,7 +205,7 @@ static notrace void s390_handle_damage(void)
140205
psw_bits(lc->mcck_new_psw).io = 0;
141206
psw_bits(lc->mcck_new_psw).ext = 0;
142207
psw_bits(lc->mcck_new_psw).wait = 1;
143-
sclp_emergency_printk(message);
208+
nmi_print_info();
144209

145210
/*
146211
* Restore machine check new PSW and control register 0 to original

0 commit comments

Comments
 (0)