Skip to content

Commit 4e81417

Browse files
committed
thermal: core: Fix thermal zone suspend-resume synchronization
There are 3 synchronization issues with thermal zone suspend-resume during system-wide transitions: 1. The resume code runs in a PM notifier which is invoked after user space has been thawed, so it can run concurrently with user space which can trigger a thermal zone device removal. If that happens, the thermal zone resume code may use a stale pointer to the next list element and crash, because it does not hold thermal_list_lock while walking thermal_tz_list. 2. The thermal zone resume code calls thermal_zone_device_init() outside the zone lock, so user space or an update triggered by the platform firmware may see an inconsistent state of a thermal zone leading to unexpected behavior. 3. Clearing the in_suspend global variable in thermal_pm_notify() allows __thermal_zone_device_update() to continue for all thermal zones and it may as well run before the thermal_tz_list walk (or at any point during the list walk for that matter) and attempt to operate on a thermal zone that has not been resumed yet. It may also race destructively with thermal_zone_device_init(). To address these issues, add thermal_list_lock locking to thermal_pm_notify(), especially arount the thermal_tz_list, make it call thermal_zone_device_init() back-to-back with __thermal_zone_device_update() under the zone lock and replace in_suspend with per-zone bool "suspend" indicators set and unset under the given zone's lock. Link: https://lore.kernel.org/linux-pm/[email protected]/ Reported-by: Bo Ye <[email protected]> Signed-off-by: Rafael J. Wysocki <[email protected]>
1 parent 5f70413 commit 4e81417

File tree

2 files changed

+25
-7
lines changed

2 files changed

+25
-7
lines changed

drivers/thermal/thermal_core.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ static LIST_HEAD(thermal_governor_list);
3737
static DEFINE_MUTEX(thermal_list_lock);
3838
static DEFINE_MUTEX(thermal_governor_lock);
3939

40-
static atomic_t in_suspend;
41-
4240
static struct thermal_governor *def_governor;
4341

4442
/*
@@ -431,7 +429,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
431429
{
432430
struct thermal_trip *trip;
433431

434-
if (atomic_read(&in_suspend))
432+
if (tz->suspended)
435433
return;
436434

437435
if (!thermal_zone_device_is_enabled(tz))
@@ -1542,17 +1540,35 @@ static int thermal_pm_notify(struct notifier_block *nb,
15421540
case PM_HIBERNATION_PREPARE:
15431541
case PM_RESTORE_PREPARE:
15441542
case PM_SUSPEND_PREPARE:
1545-
atomic_set(&in_suspend, 1);
1543+
mutex_lock(&thermal_list_lock);
1544+
1545+
list_for_each_entry(tz, &thermal_tz_list, node) {
1546+
mutex_lock(&tz->lock);
1547+
1548+
tz->suspended = true;
1549+
1550+
mutex_unlock(&tz->lock);
1551+
}
1552+
1553+
mutex_unlock(&thermal_list_lock);
15461554
break;
15471555
case PM_POST_HIBERNATION:
15481556
case PM_POST_RESTORE:
15491557
case PM_POST_SUSPEND:
1550-
atomic_set(&in_suspend, 0);
1558+
mutex_lock(&thermal_list_lock);
1559+
15511560
list_for_each_entry(tz, &thermal_tz_list, node) {
1561+
mutex_lock(&tz->lock);
1562+
1563+
tz->suspended = false;
1564+
15521565
thermal_zone_device_init(tz);
1553-
thermal_zone_device_update(tz,
1554-
THERMAL_EVENT_UNSPECIFIED);
1566+
__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
1567+
1568+
mutex_unlock(&tz->lock);
15551569
}
1570+
1571+
mutex_unlock(&thermal_list_lock);
15561572
break;
15571573
default:
15581574
break;

include/linux/thermal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct thermal_cooling_device {
152152
* @node: node in thermal_tz_list (in thermal_core.c)
153153
* @poll_queue: delayed work for polling
154154
* @notify_event: Last notification event
155+
* @suspended: thermal zone suspend indicator
155156
*/
156157
struct thermal_zone_device {
157158
int id;
@@ -185,6 +186,7 @@ struct thermal_zone_device {
185186
struct list_head node;
186187
struct delayed_work poll_queue;
187188
enum thermal_notify_event notify_event;
189+
bool suspended;
188190
};
189191

190192
/**

0 commit comments

Comments
 (0)