25
25
26
26
#define MCA_MAX_REGS_COUNT (16)
27
27
28
+ #define MCA_REG_FIELD (x , h , l ) (((x) & GENMASK_ULL(h, l)) >> l)
29
+ #define MCA_REG__STATUS__VAL (x ) MCA_REG_FIELD(x, 63, 63)
30
+ #define MCA_REG__STATUS__OVERFLOW (x ) MCA_REG_FIELD(x, 62, 62)
31
+ #define MCA_REG__STATUS__UC (x ) MCA_REG_FIELD(x, 61, 61)
32
+ #define MCA_REG__STATUS__EN (x ) MCA_REG_FIELD(x, 60, 60)
33
+ #define MCA_REG__STATUS__MISCV (x ) MCA_REG_FIELD(x, 59, 59)
34
+ #define MCA_REG__STATUS__ADDRV (x ) MCA_REG_FIELD(x, 58, 58)
35
+ #define MCA_REG__STATUS__PCC (x ) MCA_REG_FIELD(x, 57, 57)
36
+ #define MCA_REG__STATUS__ERRCOREIDVAL (x ) MCA_REG_FIELD(x, 56, 56)
37
+ #define MCA_REG__STATUS__TCC (x ) MCA_REG_FIELD(x, 55, 55)
38
+ #define MCA_REG__STATUS__SYNDV (x ) MCA_REG_FIELD(x, 53, 53)
39
+ #define MCA_REG__STATUS__CECC (x ) MCA_REG_FIELD(x, 46, 46)
40
+ #define MCA_REG__STATUS__UECC (x ) MCA_REG_FIELD(x, 45, 45)
41
+ #define MCA_REG__STATUS__DEFERRED (x ) MCA_REG_FIELD(x, 44, 44)
42
+ #define MCA_REG__STATUS__POISON (x ) MCA_REG_FIELD(x, 43, 43)
43
+ #define MCA_REG__STATUS__SCRUB (x ) MCA_REG_FIELD(x, 40, 40)
44
+ #define MCA_REG__STATUS__ERRCOREID (x ) MCA_REG_FIELD(x, 37, 32)
45
+ #define MCA_REG__STATUS__ADDRLSB (x ) MCA_REG_FIELD(x, 29, 24)
46
+ #define MCA_REG__STATUS__ERRORCODEEXT (x ) MCA_REG_FIELD(x, 21, 16)
47
+ #define MCA_REG__STATUS__ERRORCODE (x ) MCA_REG_FIELD(x, 15, 0)
48
+
28
49
enum amdgpu_mca_ip {
29
50
AMDGPU_MCA_IP_UNKNOW = -1 ,
30
51
AMDGPU_MCA_IP_PSP = 0 ,
@@ -57,6 +78,17 @@ struct amdgpu_mca {
57
78
const struct amdgpu_mca_smu_funcs * mca_funcs ;
58
79
};
59
80
81
+ enum mca_reg_idx {
82
+ MCA_REG_IDX_CONTROL = 0 ,
83
+ MCA_REG_IDX_STATUS = 1 ,
84
+ MCA_REG_IDX_ADDR = 2 ,
85
+ MCA_REG_IDX_MISC0 = 3 ,
86
+ MCA_REG_IDX_CONFIG = 4 ,
87
+ MCA_REG_IDX_IPID = 5 ,
88
+ MCA_REG_IDX_SYND = 6 ,
89
+ MCA_REG_IDX_COUNT = 16 ,
90
+ };
91
+
60
92
struct mca_bank_info {
61
93
int socket_id ;
62
94
int aid ;
@@ -72,18 +104,28 @@ struct mca_bank_entry {
72
104
uint64_t regs [MCA_MAX_REGS_COUNT ];
73
105
};
74
106
107
+ struct mca_bank_node {
108
+ struct mca_bank_entry entry ;
109
+ struct list_head node ;
110
+ };
111
+
112
+ struct mca_bank_set {
113
+ int nr_entries ;
114
+ struct list_head list ;
115
+ };
116
+
75
117
struct amdgpu_mca_smu_funcs {
76
118
int max_ue_count ;
77
119
int max_ce_count ;
78
120
int (* mca_set_debug_mode )(struct amdgpu_device * adev , bool enable );
79
- int (* mca_get_error_count )(struct amdgpu_device * adev , enum amdgpu_ras_block blk ,
80
- enum amdgpu_mca_error_type type , uint32_t * count );
121
+ int (* mca_get_ras_mca_set )(struct amdgpu_device * adev , enum amdgpu_ras_block blk , enum amdgpu_mca_error_type type ,
122
+ struct mca_bank_set * mca_set );
123
+ int (* mca_parse_mca_error_count )(struct amdgpu_device * adev , enum amdgpu_ras_block blk , enum amdgpu_mca_error_type type ,
124
+ struct mca_bank_entry * entry , uint32_t * count );
81
125
int (* mca_get_valid_mca_count )(struct amdgpu_device * adev , enum amdgpu_mca_error_type type ,
82
126
uint32_t * count );
83
127
int (* mca_get_mca_entry )(struct amdgpu_device * adev , enum amdgpu_mca_error_type type ,
84
128
int idx , struct mca_bank_entry * entry );
85
- int (* mca_get_ras_mca_idx_array )(struct amdgpu_device * adev , enum amdgpu_ras_block blk ,
86
- enum amdgpu_mca_error_type type , int * idx_array , int * idx_array_size );
87
129
};
88
130
89
131
void amdgpu_mca_query_correctable_error_count (struct amdgpu_device * adev ,
@@ -107,11 +149,22 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
107
149
void amdgpu_mca_smu_init_funcs (struct amdgpu_device * adev , const struct amdgpu_mca_smu_funcs * mca_funcs );
108
150
int amdgpu_mca_smu_set_debug_mode (struct amdgpu_device * adev , bool enable );
109
151
int amdgpu_mca_smu_get_valid_mca_count (struct amdgpu_device * adev , enum amdgpu_mca_error_type type , uint32_t * count );
152
+ int amdgpu_mca_smu_get_mca_set_error_count (struct amdgpu_device * adev , enum amdgpu_ras_block blk ,
153
+ enum amdgpu_mca_error_type type , uint32_t * total );
110
154
int amdgpu_mca_smu_get_error_count (struct amdgpu_device * adev , enum amdgpu_ras_block blk ,
111
155
enum amdgpu_mca_error_type type , uint32_t * count );
156
+ int amdgpu_mca_smu_parse_mca_error_count (struct amdgpu_device * adev , enum amdgpu_ras_block blk ,
157
+ enum amdgpu_mca_error_type type , struct mca_bank_entry * entry , uint32_t * count );
158
+ int amdgpu_mca_smu_get_mca_set (struct amdgpu_device * adev , enum amdgpu_ras_block blk ,
159
+ enum amdgpu_mca_error_type type , struct mca_bank_set * mca_set );
112
160
int amdgpu_mca_smu_get_mca_entry (struct amdgpu_device * adev , enum amdgpu_mca_error_type type ,
113
161
int idx , struct mca_bank_entry * entry );
114
162
115
163
void amdgpu_mca_smu_debugfs_init (struct amdgpu_device * adev , struct dentry * root );
116
164
165
+ void amdgpu_mca_bank_set_init (struct mca_bank_set * mca_set );
166
+ int amdgpu_mca_bank_set_add_entry (struct mca_bank_set * mca_set , struct mca_bank_entry * entry );
167
+ void amdgpu_mca_bank_set_release (struct mca_bank_set * mca_set );
168
+ int amdgpu_mca_smu_log_ras_error (struct amdgpu_device * adev , enum amdgpu_ras_block blk , enum amdgpu_mca_error_type type , struct ras_err_data * err_data );
169
+
117
170
#endif
0 commit comments