Skip to content

Commit 011d949

Browse files
[thermalctld] Change thermal monitor from sub-process to thread (#692)
Description Change thermal monitor from sub-process to a thread to reduce memory consumption Motivation and Context Reduce memory consumption to gain better performance How Has This Been Tested? Existing unit test Manual test to check the performance Additional Information (Optional) # before the change PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 8461 root 20 0 125.4m 36.3m 17.3m S 0.0 0.5 0:00.64 python3 /usr/local/bin/thermalctld 8462 root 20 0 52.4m 25.8m 7.9m S 0.0 0.3 0:00.15 python3 /usr/local/bin/thermalctld # after the change PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 8397 root 20 0 197.3m 36.5m 17.3m S 0.0 0.5 0:02.62 python3 /usr/local/bin/thermalctld
1 parent ef9eb35 commit 011d949

File tree

2 files changed

+32
-33
lines changed

2 files changed

+32
-33
lines changed

sonic-thermalctld/scripts/thermalctld

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ from datetime import datetime
1515

1616
import sonic_platform
1717
from sonic_py_common import daemon_base, logger
18-
from sonic_py_common.task_base import ProcessTaskBase
18+
from sonic_py_common.task_base import ThreadTaskBase
1919
from swsscommon import swsscommon
2020

2121

@@ -743,8 +743,7 @@ class TemperatureUpdater(logger.Logger):
743743
self.chassis_table._del(name)
744744

745745

746-
class ThermalMonitor(ProcessTaskBase):
747-
746+
class ThermalMonitor(ThreadTaskBase):
748747
def __init__(
749748
self, chassis, initial_interval, update_interval, update_elapsed_threshold
750749
):

sonic-thermalctld/tests/test_thermalctld.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import sys
3-
import multiprocessing
3+
import threading
44
from imp import load_source # TODO: Replace with importlib once we no longer need to support Python 2
55

66
# TODO: Clean this up once we no longer need to support Python 2
@@ -131,7 +131,7 @@ class TestFanUpdater(object):
131131
@mock.patch('thermalctld.update_entity_info', mock.MagicMock())
132132
def test_refresh_fan_drawer_status_fan_drawer_get_name_not_impl(self):
133133
# Test case where fan_drawer.get_name is not implemented
134-
fan_updater = thermalctld.FanUpdater(MockChassis(), multiprocessing.Event())
134+
fan_updater = thermalctld.FanUpdater(MockChassis(), threading.Event())
135135
mock_fan_drawer = mock.MagicMock()
136136
fan_updater._refresh_fan_drawer_status(mock_fan_drawer, 1)
137137
assert thermalctld.update_entity_info.call_count == 0
@@ -145,7 +145,7 @@ def test_update_fan_with_exception(self):
145145
fan.make_over_speed()
146146
chassis.get_all_fans().append(fan)
147147

148-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
148+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
149149
fan_updater.update()
150150
assert fan.get_status_led() == MockFan.STATUS_LED_COLOR_RED
151151
assert fan_updater.log_warning.call_count == 1
@@ -162,15 +162,15 @@ def test_set_fan_led_exception(self):
162162
mock_fan = MockFan()
163163
mock_fan.set_status_led = mock.MagicMock(side_effect=NotImplementedError)
164164

165-
fan_updater = thermalctld.FanUpdater(MockChassis(), multiprocessing.Event())
165+
fan_updater = thermalctld.FanUpdater(MockChassis(), threading.Event())
166166
fan_updater._set_fan_led(mock_fan_drawer, mock_fan, 'Test Fan', fan_status)
167167
assert fan_updater.log_warning.call_count == 1
168168
fan_updater.log_warning.assert_called_with('Failed to set status LED for fan Test Fan, set_status_led not implemented')
169169

170170
def test_fan_absent(self):
171171
chassis = MockChassis()
172172
chassis.make_absent_fan()
173-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
173+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
174174
fan_updater.update()
175175
fan_list = chassis.get_all_fans()
176176
assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED
@@ -194,7 +194,7 @@ def test_fan_absent(self):
194194
def test_fan_faulty(self):
195195
chassis = MockChassis()
196196
chassis.make_faulty_fan()
197-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
197+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
198198
fan_updater.update()
199199
fan_list = chassis.get_all_fans()
200200
assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED
@@ -218,7 +218,7 @@ def test_fan_faulty(self):
218218
def test_fan_under_speed(self):
219219
chassis = MockChassis()
220220
chassis.make_under_speed_fan()
221-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
221+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
222222
fan_updater.update()
223223
fan_list = chassis.get_all_fans()
224224
assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED
@@ -234,7 +234,7 @@ def test_fan_under_speed(self):
234234
def test_fan_over_speed(self):
235235
chassis = MockChassis()
236236
chassis.make_over_speed_fan()
237-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
237+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
238238
fan_updater.update()
239239
fan_list = chassis.get_all_fans()
240240
assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED
@@ -253,7 +253,7 @@ def test_update_psu_fans(self):
253253
mock_fan = MockFan()
254254
psu._fan_list.append(mock_fan)
255255
chassis._psu_list.append(psu)
256-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
256+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
257257
fan_updater.update()
258258
assert fan_updater.log_warning.call_count == 0
259259

@@ -274,7 +274,7 @@ def test_update_module_fans(self):
274274
chassis.set_modular_chassis(True)
275275
module._fan_list.append(mock_fan)
276276
chassis._module_list.append(module)
277-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
277+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
278278
fan_updater.update()
279279
assert fan_updater.log_warning.call_count == 0
280280

@@ -321,7 +321,7 @@ def test_insufficient_fan_number():
321321
chassis = MockChassis()
322322
chassis.make_absent_fan()
323323
chassis.make_faulty_fan()
324-
fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event())
324+
fan_updater = thermalctld.FanUpdater(chassis, threading.Event())
325325
fan_updater.update()
326326
assert fan_updater.log_warning.call_count == 3
327327
expected_calls = [
@@ -405,7 +405,7 @@ class TestTemperatureUpdater(object):
405405
"""
406406
def test_deinit(self):
407407
chassis = MockChassis()
408-
temp_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
408+
temp_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
409409
temp_updater.temperature_status_dict = {'key1': 'value1', 'key2': 'value2'}
410410
temp_updater.table = Table("STATE_DB", "xtable")
411411
temp_updater.table._del = mock.MagicMock()
@@ -427,7 +427,7 @@ def test_deinit(self):
427427

428428
def test_deinit_exception(self):
429429
chassis = MockChassis()
430-
temp_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
430+
temp_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
431431
temp_updater.temperature_status_dict = {'key1': 'value1', 'key2': 'value2'}
432432
temp_updater.table = Table("STATE_DB", "xtable")
433433
temp_updater.table._del = mock.MagicMock()
@@ -451,7 +451,7 @@ def test_deinit_exception(self):
451451
def test_over_temper(self):
452452
chassis = MockChassis()
453453
chassis.make_over_temper_thermal()
454-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
454+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
455455
temperature_updater.update()
456456
thermal_list = chassis.get_all_thermals()
457457
assert temperature_updater.log_warning.call_count == 1
@@ -465,7 +465,7 @@ def test_over_temper(self):
465465
def test_under_temper(self):
466466
chassis = MockChassis()
467467
chassis.make_under_temper_thermal()
468-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
468+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
469469
temperature_updater.update()
470470
thermal_list = chassis.get_all_thermals()
471471
assert temperature_updater.log_warning.call_count == 1
@@ -482,7 +482,7 @@ def test_update_psu_thermals(self):
482482
mock_thermal = MockThermal()
483483
psu._thermal_list.append(mock_thermal)
484484
chassis._psu_list.append(psu)
485-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
485+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
486486
temperature_updater.update()
487487
assert temperature_updater.log_warning.call_count == 0
488488

@@ -502,7 +502,7 @@ def test_update_sfp_thermals(self):
502502
mock_thermal = MockThermal()
503503
sfp._thermal_list.append(mock_thermal)
504504
chassis._sfp_list.append(sfp)
505-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
505+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
506506
temperature_updater.update()
507507
assert temperature_updater.log_warning.call_count == 0
508508

@@ -523,7 +523,7 @@ def test_update_thermal_with_exception(self):
523523
thermal.make_over_temper()
524524
chassis.get_all_thermals().append(thermal)
525525

526-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
526+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
527527
temperature_updater.update()
528528
assert temperature_updater.log_warning.call_count == 2
529529

@@ -544,7 +544,7 @@ def test_update_module_thermals(self):
544544
chassis = MockChassis()
545545
chassis.make_module_thermal()
546546
chassis.set_modular_chassis(True)
547-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
547+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
548548
temperature_updater.update()
549549
assert len(temperature_updater.all_thermals) == 3
550550

@@ -559,21 +559,21 @@ def test_dpu_chassis_thermals():
559559
# Modular chassis (Not a dpu chassis) No Change in TemperatureUpdater Behaviour
560560
chassis.set_modular_chassis(True)
561561
chassis.set_my_slot(1)
562-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
562+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
563563
assert temperature_updater.chassis_table
564564
# DPU chassis TemperatureUpdater without is_smartswitch False return - No update to CHASSIS_STATE_DB
565565
chassis.set_modular_chassis(False)
566566
chassis.set_dpu(True)
567-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
567+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
568568
assert not temperature_updater.chassis_table
569569
# DPU chassis TemperatureUpdater without get_dpu_id implmenetation- No update to CHASSIS_STATE_DB
570570
chassis.set_smartswitch(True)
571-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
571+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
572572
assert not temperature_updater.chassis_table
573573
# DPU chassis TemperatureUpdater with get_dpu_id implemented - Update data to CHASSIS_STATE_DB
574574
dpu_id = 1
575575
chassis.set_dpu_id(dpu_id)
576-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
576+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
577577
assert temperature_updater.chassis_table
578578
# Table name in chassis state db = TEMPERATURE_INFO_0 for dpu_id 0
579579
assert temperature_updater.chassis_table.table_name == f"{TEMPER_INFO_TABLE_NAME}_{dpu_id}"
@@ -588,7 +588,7 @@ def test_dpu_chassis_state_deinit():
588588
chassis.set_modular_chassis(False)
589589
chassis.set_dpu(True)
590590
chassis.set_dpu_id(1)
591-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
591+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
592592
assert temperature_updater.chassis_table
593593
temperature_updater.table = Table("STATE_DB", "xtable")
594594
temperature_updater.phy_entity_table = None
@@ -611,7 +611,7 @@ def test_updater_dpu_thermal_check_chassis_table():
611611
chassis.set_dpu(True)
612612
chassis.set_smartswitch(True)
613613
chassis.set_dpu_id(1)
614-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
614+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
615615
temperature_updater.update()
616616
assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals()
617617

@@ -628,17 +628,17 @@ def test_updater_thermal_check_modular_chassis():
628628
chassis = MockChassis()
629629
assert chassis.is_modular_chassis() == False
630630

631-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
631+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
632632
assert temperature_updater.chassis_table == None
633633

634634
chassis.set_modular_chassis(True)
635635
chassis.set_my_slot(-1)
636-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
636+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
637637
assert temperature_updater.chassis_table == None
638638

639639
my_slot = 1
640640
chassis.set_my_slot(my_slot)
641-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
641+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
642642
assert temperature_updater.chassis_table != None
643643
assert temperature_updater.chassis_table.table_name == '{}_{}'.format(TEMPER_INFO_TABLE_NAME, str(my_slot))
644644

@@ -651,7 +651,7 @@ def test_updater_thermal_check_chassis_table():
651651

652652
chassis.set_modular_chassis(True)
653653
chassis.set_my_slot(1)
654-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
654+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
655655

656656
temperature_updater.update()
657657
assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals()
@@ -670,7 +670,7 @@ def test_updater_thermal_check_min_max():
670670

671671
chassis.set_modular_chassis(True)
672672
chassis.set_my_slot(1)
673-
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
673+
temperature_updater = thermalctld.TemperatureUpdater(chassis, threading.Event())
674674

675675
temperature_updater.update()
676676
slot_dict = temperature_updater.chassis_table.get(thermal.get_name())

0 commit comments

Comments
 (0)