Skip to content

Commit 0cac183

Browse files
jokim-amdalexdeucher
authored andcommitted
drm/amdkfd: range check cp bad op exception interrupts
Due to a CP interrupt bug, bad packet garbage exception codes are raised. Do a range check so that the debugger and runtime do not receive garbage codes. Update the user api to guard exception code type checking as well. Signed-off-by: Jonathan Kim <[email protected]> Tested-by: Jesse Zhang <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 78aca9e commit 0cac183

File tree

4 files changed

+20
-6
lines changed

4 files changed

+20
-6
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
339339
break;
340340
}
341341
kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
342-
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
342+
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
343+
KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
343344
kfd_set_dbg_ev_from_interrupt(dev, pasid,
344345
KFD_DEBUG_DOORBELL_ID(context_id0),
345346
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),

drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
328328
/* CP */
329329
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
330330
kfd_signal_event_interrupt(pasid, context_id0, 32);
331-
else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
331+
else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
332+
KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)))
332333
kfd_set_dbg_ev_from_interrupt(dev, pasid,
333334
KFD_CTXID0_DOORBELL_ID(context_id0),
334335
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),

drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
388388
break;
389389
}
390390
kfd_signal_event_interrupt(pasid, sq_int_data, 24);
391-
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
391+
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
392+
KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
392393
kfd_set_dbg_ev_from_interrupt(dev, pasid,
393394
KFD_DEBUG_DOORBELL_ID(context_id0),
394395
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),

include/uapi/linux/kfd_ioctl.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code {
913913
KFD_EC_MASK(EC_DEVICE_NEW))
914914
#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \
915915
KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
916+
#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \
917+
KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \
918+
KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \
919+
KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \
920+
KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \
921+
KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \
922+
KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \
923+
KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED))
916924

917925
/* Checks for exception code types for KFD search */
926+
#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX)
918927
#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \
919-
(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
928+
(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
920929
#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \
921-
(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
930+
(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
922931
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
923-
(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
932+
(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
933+
#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \
934+
(KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET))
924935

925936

926937
/* Runtime enable states */

0 commit comments

Comments
 (0)