Skip to content

Commit 8bdbd8b

Browse files
committed
drm/panthor: Make sure we handle 'unknown group state' case properly
When we check for state values returned by the FW, we only cover part of the 0:7 range. Make sure we catch FW inconsistencies by adding a default to the switch statement, and flagging the group state as unknown in that case. When an unknown state is detected, we trigger a reset, and consider the group as unusable after that point, to prevent the potential corruption from creeping in other places if we continue executing stuff on this context. v2: - Add Steve's R-b - Fix commit message Reported-by: Dan Carpenter <[email protected]> Closes: https://lore.kernel.org/dri-devel/[email protected]/T/#u Suggested-by: Steven Price <[email protected]> Signed-off-by: Boris Brezillon <[email protected]> Reviewed-by: Steven Price <[email protected]> Reviewed-by: Liviu Dudau <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 08f4413 commit 8bdbd8b

File tree

1 file changed

+35
-2
lines changed

1 file changed

+35
-2
lines changed

drivers/gpu/drm/panthor/panthor_sched.c

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,18 @@ enum panthor_group_state {
490490
* Can no longer be scheduled. The only allowed action is a destruction.
491491
*/
492492
PANTHOR_CS_GROUP_TERMINATED,
493+
494+
/**
495+
* @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
496+
*
497+
* The FW returned an inconsistent state. The group is flagged unusable
498+
* and can no longer be scheduled. The only allowed action is a
499+
* destruction.
500+
*
501+
* When that happens, we also schedule a FW reset, to start from a fresh
502+
* state.
503+
*/
504+
PANTHOR_CS_GROUP_UNKNOWN_STATE,
493505
};
494506

495507
/**
@@ -1127,6 +1139,7 @@ csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
11271139
struct panthor_fw_csg_iface *csg_iface;
11281140
struct panthor_group *group;
11291141
enum panthor_group_state new_state, old_state;
1142+
u32 csg_state;
11301143

11311144
lockdep_assert_held(&ptdev->scheduler->lock);
11321145

@@ -1137,7 +1150,8 @@ csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
11371150
return;
11381151

11391152
old_state = group->state;
1140-
switch (csg_iface->output->ack & CSG_STATE_MASK) {
1153+
csg_state = csg_iface->output->ack & CSG_STATE_MASK;
1154+
switch (csg_state) {
11411155
case CSG_STATE_START:
11421156
case CSG_STATE_RESUME:
11431157
new_state = PANTHOR_CS_GROUP_ACTIVE;
@@ -1148,11 +1162,28 @@ csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
11481162
case CSG_STATE_SUSPEND:
11491163
new_state = PANTHOR_CS_GROUP_SUSPENDED;
11501164
break;
1165+
default:
1166+
/* The unknown state might be caused by a FW state corruption,
1167+
* which means the group metadata can't be trusted anymore, and
1168+
* the SUSPEND operation might propagate the corruption to the
1169+
* suspend buffers. Flag the group state as unknown to make
1170+
* sure it's unusable after that point.
1171+
*/
1172+
drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
1173+
csg_id, csg_state);
1174+
new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
1175+
break;
11511176
}
11521177

11531178
if (old_state == new_state)
11541179
return;
11551180

1181+
/* The unknown state might be caused by a FW issue, reset the FW to
1182+
* take a fresh start.
1183+
*/
1184+
if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
1185+
panthor_device_schedule_reset(ptdev);
1186+
11561187
if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
11571188
csg_slot_sync_queues_state_locked(ptdev, csg_id);
11581189

@@ -1783,6 +1814,7 @@ static bool
17831814
group_can_run(struct panthor_group *group)
17841815
{
17851816
return group->state != PANTHOR_CS_GROUP_TERMINATED &&
1817+
group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
17861818
!group->destroyed && group->fatal_queues == 0 &&
17871819
!group->timedout;
17881820
}
@@ -2557,7 +2589,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
25572589

25582590
if (csg_slot->group) {
25592591
csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
2560-
CSG_STATE_SUSPEND,
2592+
group_can_run(csg_slot->group) ?
2593+
CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
25612594
CSG_STATE_MASK);
25622595
}
25632596
}

0 commit comments

Comments
 (0)