28
28
#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
29
29
#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
30
30
#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
31
-
31
+ #define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
32
+ #define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2
33
+ #define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64
32
34
/*
33
35
* layout
34
- * 0 64KB 65KB 66KB
35
- * | VBIOS | PF2VF | VF2PF | Bad Page | ...
36
- * | 64KB | 1KB | 1KB |
36
+ * 0 64KB 65KB 66KB 68KB 132KB
37
+ * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
38
+ * | 64KB | 1KB | 1KB | 2KB | 64KB | ...
37
39
*/
40
+
38
41
#define AMD_SRIOV_MSG_SIZE_KB 1
39
42
#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
40
43
#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
41
44
#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
45
+ #define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
42
46
43
47
/*
44
48
* PF2VF history log:
@@ -86,30 +90,59 @@ enum amd_sriov_ucode_engine_id {
86
90
87
91
union amd_sriov_msg_feature_flags {
88
92
struct {
89
- uint32_t error_log_collect : 1 ;
90
- uint32_t host_load_ucodes : 1 ;
91
- uint32_t host_flr_vramlost : 1 ;
92
- uint32_t mm_bw_management : 1 ;
93
- uint32_t pp_one_vf_mode : 1 ;
94
- uint32_t reg_indirect_acc : 1 ;
95
- uint32_t av1_support : 1 ;
96
- uint32_t vcn_rb_decouple : 1 ;
97
- uint32_t mes_info_enable : 1 ;
98
- uint32_t reserved : 23 ;
93
+ uint32_t error_log_collect : 1 ;
94
+ uint32_t host_load_ucodes : 1 ;
95
+ uint32_t host_flr_vramlost : 1 ;
96
+ uint32_t mm_bw_management : 1 ;
97
+ uint32_t pp_one_vf_mode : 1 ;
98
+ uint32_t reg_indirect_acc : 1 ;
99
+ uint32_t av1_support : 1 ;
100
+ uint32_t vcn_rb_decouple : 1 ;
101
+ uint32_t mes_info_dump_enable : 1 ;
102
+ uint32_t ras_caps : 1 ;
103
+ uint32_t ras_telemetry : 1 ;
104
+ uint32_t reserved : 21 ;
99
105
} flags ;
100
106
uint32_t all ;
101
107
};
102
108
103
109
union amd_sriov_reg_access_flags {
104
110
struct {
105
- uint32_t vf_reg_access_ih : 1 ;
106
- uint32_t vf_reg_access_mmhub : 1 ;
107
- uint32_t vf_reg_access_gc : 1 ;
108
- uint32_t reserved : 29 ;
111
+ uint32_t vf_reg_access_ih : 1 ;
112
+ uint32_t vf_reg_access_mmhub : 1 ;
113
+ uint32_t vf_reg_access_gc : 1 ;
114
+ uint32_t reserved : 29 ;
109
115
} flags ;
110
116
uint32_t all ;
111
117
};
112
118
119
+ union amd_sriov_ras_caps {
120
+ struct {
121
+ uint64_t block_umc : 1 ;
122
+ uint64_t block_sdma : 1 ;
123
+ uint64_t block_gfx : 1 ;
124
+ uint64_t block_mmhub : 1 ;
125
+ uint64_t block_athub : 1 ;
126
+ uint64_t block_pcie_bif : 1 ;
127
+ uint64_t block_hdp : 1 ;
128
+ uint64_t block_xgmi_wafl : 1 ;
129
+ uint64_t block_df : 1 ;
130
+ uint64_t block_smn : 1 ;
131
+ uint64_t block_sem : 1 ;
132
+ uint64_t block_mp0 : 1 ;
133
+ uint64_t block_mp1 : 1 ;
134
+ uint64_t block_fuse : 1 ;
135
+ uint64_t block_mca : 1 ;
136
+ uint64_t block_vcn : 1 ;
137
+ uint64_t block_jpeg : 1 ;
138
+ uint64_t block_ih : 1 ;
139
+ uint64_t block_mpio : 1 ;
140
+ uint64_t poison_propogation_mode : 1 ;
141
+ uint64_t reserved : 44 ;
142
+ } bits ;
143
+ uint64_t all ;
144
+ };
145
+
113
146
union amd_sriov_msg_os_info {
114
147
struct {
115
148
uint32_t windows : 1 ;
@@ -158,7 +191,7 @@ struct amd_sriov_msg_pf2vf_info_header {
158
191
uint32_t reserved [2 ];
159
192
};
160
193
161
- #define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49 )
194
+ #define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55 )
162
195
struct amd_sriov_msg_pf2vf_info {
163
196
/* header contains size and version */
164
197
struct amd_sriov_msg_pf2vf_info_header header ;
@@ -211,6 +244,12 @@ struct amd_sriov_msg_pf2vf_info {
211
244
uint32_t pcie_atomic_ops_support_flags ;
212
245
/* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
213
246
uint32_t gpu_capacity ;
247
+ /* vf bdf on host pci tree for debug only */
248
+ uint32_t bdf_on_host ;
249
+ uint32_t more_bp ; //Reserved for future use.
250
+ union amd_sriov_ras_caps ras_en_caps ;
251
+ union amd_sriov_ras_caps ras_telemetry_en_caps ;
252
+
214
253
/* reserved */
215
254
uint32_t reserved [256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE ];
216
255
} __packed ;
@@ -283,8 +322,12 @@ enum amd_sriov_mailbox_request_message {
283
322
MB_REQ_MSG_REL_GPU_FINI_ACCESS ,
284
323
MB_REQ_MSG_REQ_GPU_RESET_ACCESS ,
285
324
MB_REQ_MSG_REQ_GPU_INIT_DATA ,
325
+ MB_REQ_MSG_PSP_VF_CMD_RELAY ,
286
326
287
327
MB_REQ_MSG_LOG_VF_ERROR = 200 ,
328
+ MB_REQ_MSG_READY_TO_RESET = 201 ,
329
+ MB_REQ_MSG_RAS_POISON = 202 ,
330
+ MB_REQ_RAS_ERROR_COUNT = 203 ,
288
331
};
289
332
290
333
/* mailbox message send from host to guest */
@@ -297,10 +340,60 @@ enum amd_sriov_mailbox_response_message {
297
340
MB_RES_MSG_FAIL ,
298
341
MB_RES_MSG_QUERY_ALIVE ,
299
342
MB_RES_MSG_GPU_INIT_DATA_READY ,
343
+ MB_RES_MSG_RAS_ERROR_COUNT_READY = 11 ,
300
344
301
345
MB_RES_MSG_TEXT_MESSAGE = 255
302
346
};
303
347
348
+ enum amd_sriov_ras_telemetry_gpu_block {
349
+ RAS_TELEMETRY_GPU_BLOCK_UMC = 0 ,
350
+ RAS_TELEMETRY_GPU_BLOCK_SDMA = 1 ,
351
+ RAS_TELEMETRY_GPU_BLOCK_GFX = 2 ,
352
+ RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3 ,
353
+ RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4 ,
354
+ RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5 ,
355
+ RAS_TELEMETRY_GPU_BLOCK_HDP = 6 ,
356
+ RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7 ,
357
+ RAS_TELEMETRY_GPU_BLOCK_DF = 8 ,
358
+ RAS_TELEMETRY_GPU_BLOCK_SMN = 9 ,
359
+ RAS_TELEMETRY_GPU_BLOCK_SEM = 10 ,
360
+ RAS_TELEMETRY_GPU_BLOCK_MP0 = 11 ,
361
+ RAS_TELEMETRY_GPU_BLOCK_MP1 = 12 ,
362
+ RAS_TELEMETRY_GPU_BLOCK_FUSE = 13 ,
363
+ RAS_TELEMETRY_GPU_BLOCK_MCA = 14 ,
364
+ RAS_TELEMETRY_GPU_BLOCK_VCN = 15 ,
365
+ RAS_TELEMETRY_GPU_BLOCK_JPEG = 16 ,
366
+ RAS_TELEMETRY_GPU_BLOCK_IH = 17 ,
367
+ RAS_TELEMETRY_GPU_BLOCK_MPIO = 18 ,
368
+ RAS_TELEMETRY_GPU_BLOCK_COUNT = 19 ,
369
+ };
370
+
371
+ struct amd_sriov_ras_telemetry_header {
372
+ uint32_t checksum ;
373
+ uint32_t used_size ;
374
+ uint32_t reserved [2 ];
375
+ };
376
+
377
+ struct amd_sriov_ras_telemetry_error_count {
378
+ struct {
379
+ uint32_t ce_count ;
380
+ uint32_t ue_count ;
381
+ uint32_t de_count ;
382
+ uint32_t ce_overflow_count ;
383
+ uint32_t ue_overflow_count ;
384
+ uint32_t de_overflow_count ;
385
+ uint32_t reserved [6 ];
386
+ } block [RAS_TELEMETRY_GPU_BLOCK_COUNT ];
387
+ };
388
+
389
+ struct amdsriov_ras_telemetry {
390
+ struct amd_sriov_ras_telemetry_header header ;
391
+
392
+ union {
393
+ struct amd_sriov_ras_telemetry_error_count error_count ;
394
+ } body ;
395
+ };
396
+
304
397
/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
305
398
enum amd_sriov_gpu_init_data_version {
306
399
GPU_INIT_DATA_READY_V1 = 1 ,
0 commit comments