Skip to content

Commit 98b00cd

Browse files
rkavitha-hclVSuryaprasad-HCL
authored andcommitted
sonic-host-services changes for gNOI Cold Reboot
1 parent d5a250e commit 98b00cd

File tree

5 files changed

+411
-2
lines changed

5 files changed

+411
-2
lines changed

host_modules/reboot.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""reboot module which performs reboot"""
2+
3+
import json
4+
import logging
5+
import threading
6+
import time
7+
from host_modules import host_service
8+
from utils.run_cmd import _run_command
9+
10+
MOD_NAME = 'reboot'
11+
# Reboot method in reboot request
12+
# Both enum and string representations are supported
13+
REBOOTMETHOD_COLD_BOOT_VALUES = {1, "COLD"}
14+
REBOOTMETHOD_WARM_BOOT_VALUES = {4, "WARM"}
15+
REBOOTMETHOD_NSF_VALUES = {5, "NSF"}
16+
17+
# Timeout for SONiC Host Service to be killed during reboot
18+
REBOOT_TIMEOUT = 260
19+
20+
EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
21+
EXECUTE_NSF_REBOOT_COMMAND = "/etc/init.d/gpins-nsf-boot nsf-reboot"
22+
23+
logger = logging.getLogger(__name__)
24+
25+
26+
class Reboot(host_service.HostModule):
27+
"""DBus endpoint that executes the reboot and returns the reboot status
28+
"""
29+
30+
def __init__(self, mod_name):
31+
"""Use threading.lock mechanism to read/write into response_data
32+
since response_data can be read/write by multiple threads"""
33+
self.lock = threading.Lock()
34+
# reboot_status_flag is used to keep track of reboot status on host
35+
self.reboot_status_flag = {}
36+
# Populating with default value i.e., no active reboot
37+
self.populate_reboot_status_flag()
38+
super(Reboot, self).__init__(mod_name)
39+
40+
def populate_reboot_status_flag(self, active = False, when = 0, reason = ""):
41+
"""Populates the reboot_status_flag with given input params"""
42+
self.lock.acquire()
43+
self.reboot_status_flag["active"] = active
44+
self.reboot_status_flag["when"] = when
45+
self.reboot_status_flag["reason"] = reason
46+
self.lock.release()
47+
return
48+
49+
def validate_reboot_request(self, reboot_request):
50+
# Check whether reboot method is present.
51+
if "method" not in reboot_request:
52+
return 1, "Reboot request must contain a reboot method"
53+
54+
# Check whether reboot method is valid.
55+
rebootmethod = reboot_request["method"]
56+
valid_method = False
57+
for values in [REBOOTMETHOD_COLD_BOOT_VALUES, REBOOTMETHOD_NSF_VALUES]:
58+
if rebootmethod in values:
59+
valid_method = True
60+
if not valid_method:
61+
return 1, "Invalid reboot method: " + str(rebootmethod)
62+
63+
# Check whether delay is non-zero. delay key will not exist in reboot_request if it is zero
64+
if "delay" in reboot_request and reboot_request["delay"] != 0:
65+
return 1, "Delayed reboot is not supported"
66+
return 0, ""
67+
68+
def execute_reboot(self, rebootmethod):
69+
"""Execute reboot and reset reboot_status_flag when reboot fails"""
70+
71+
if rebootmethod in REBOOTMETHOD_COLD_BOOT_VALUES:
72+
command = EXECUTE_COLD_REBOOT_COMMAND
73+
logger.warning("%s: Issuing cold reboot", MOD_NAME)
74+
elif rebootmethod in REBOOTMETHOD_NSF_VALUES:
75+
command = EXECUTE_NSF_REBOOT_COMMAND
76+
logger.warning("%s: Issuing NSF reboot", MOD_NAME)
77+
else:
78+
logger.error("%s: Invalid reboot method: %d", MOD_NAME, rebootmethod)
79+
return
80+
81+
rc, stdout, stderr = _run_command(command)
82+
if rc:
83+
self.populate_reboot_status_flag()
84+
logger.error("%s: Reboot failed execution with stdout: %s, "
85+
"stderr: %s", MOD_NAME, stdout, stderr)
86+
return
87+
88+
"""Wait for 260 seconds for the reboot to complete. Here, we expect that SONiC Host Service
89+
will be killed during this waiting period if the reboot is successful. If this module
90+
is still alive after the below waiting period, we can conclude that the reboot has failed.
91+
Each container can take up to 20 seconds to get killed. In total, there are 10 containers,
92+
and adding a buffer of 1 minute brings up the delay value to be 260 seconds."""
93+
time.sleep(REBOOT_TIMEOUT)
94+
# Conclude that the reboot has failed if we reach this point
95+
self.populate_reboot_status_flag()
96+
return
97+
98+
@host_service.method(host_service.bus_name(MOD_NAME), in_signature='as', out_signature='is')
99+
def issue_reboot(self, options):
100+
"""Issues reboot after performing the following steps sequentially:
101+
1. Checks that reboot_status_flag is not set
102+
2. Validates the reboot request
103+
3. Sets the reboot_status_flag
104+
4. Issues the reboot in a separate thread
105+
"""
106+
logger.warning("%s: issue_reboot rpc called", MOD_NAME)
107+
self.lock.acquire()
108+
is_reboot_ongoing = self.reboot_status_flag["active"]
109+
self.lock.release()
110+
# Return without issuing the reboot if the previous reboot is ongoing
111+
if is_reboot_ongoing:
112+
return 1, "Previous reboot is ongoing"
113+
114+
"""Convert input json formatted reboot request into python dict.
115+
reboot_request is a python dict with the following keys:
116+
method - specifies the method of reboot
117+
delay - delay to issue reboot, key exists only if it is non-zero
118+
message - reason for reboot
119+
force - either true/false, key exists only if it is true
120+
"""
121+
try:
122+
reboot_request = json.loads(options[0])
123+
except ValueError:
124+
return 1, "Failed to parse json formatted reboot request into python dict"
125+
126+
# Validate reboot request
127+
err, errstr = self.validate_reboot_request(reboot_request)
128+
if err:
129+
return err, errstr
130+
131+
# Sets reboot_status_flag to be in active state
132+
self.populate_reboot_status_flag(True, int(time.time()), reboot_request["message"])
133+
134+
# Issue reboot in a new thread and reset the reboot_status_flag if the reboot fails
135+
try:
136+
t = threading.Thread(target=self.execute_reboot, args=(reboot_request["method"],))
137+
t.start()
138+
except RuntimeError as error:
139+
return 1, "Failed to start thread to execute reboot with error: " + str(error)
140+
return 0, "Successfully issued reboot"
141+
142+
@host_service.method(host_service.bus_name(MOD_NAME), in_signature='', out_signature='is')
143+
def get_reboot_status(self):
144+
"""Returns current reboot status on host in json format"""
145+
self.lock.acquire()
146+
response_data = json.dumps(self.reboot_status_flag)
147+
self.lock.release()
148+
return 0, response_data
149+
150+
def register():
151+
"""Return the class name"""
152+
return Reboot, MOD_NAME

scripts/sonic-host-server

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import dbus.service
1212
import dbus.mainloop.glib
1313

1414
from gi.repository import GObject
15-
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service
15+
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, reboot
1616

1717

1818
def register_dbus():
@@ -21,6 +21,7 @@ def register_dbus():
2121
'config': config_engine.Config('config'),
2222
'gcu': gcu.GCU('gcu'),
2323
'host_service': host_service.HostService('host_service'),
24+
'reboot': reboot.Reboot('reboot'),
2425
'showtech': showtech.Showtech('showtech'),
2526
'systemd': systemd_service.SystemdService('systemd'),
2627
'file_stat': file_service.FileService('file')

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
maintainer = 'Joe LeVeque',
3131
maintainer_email = '[email protected]',
3232
packages = [
33-
'host_modules'
33+
'host_modules',
34+
'utils',
3435
],
3536
scripts = [
3637
'scripts/caclmgrd',

0 commit comments

Comments
 (0)