Skip to content

Commit efb38de

Browse files
committed
sonic-host-services changes for gNOI Cold Reboot
1 parent 13a5419 commit efb38de

File tree

5 files changed

+414
-2
lines changed

5 files changed

+414
-2
lines changed

host_modules/gnoi_reboot.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
"""gNOI reboot module which performs reboot"""
2+
3+
import json
4+
import logging
5+
import threading
6+
import time
7+
from host_modules import host_service
8+
from utils.run_cmd import _run_command
9+
10+
MOD_NAME = 'gnoi_reboot'
11+
# Reboot method in reboot request
12+
# Both enum and string representations are supported
13+
REBOOTMETHOD_COLD_BOOT_VALUES = {1, "COLD"}
14+
REBOOTMETHOD_WARM_BOOT_VALUES = {4, "WARM"}
15+
REBOOTMETHOD_NSF_VALUES = {5, "NSF"}
16+
17+
# Timeout for SONiC Host Service to be killed during reboot
18+
REBOOT_TIMEOUT = 260
19+
20+
EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
21+
EXECUTE_NSF_REBOOT_COMMAND = "/etc/init.d/gpins-nsf-boot nsf-reboot"
22+
23+
logger = logging.getLogger(__name__)
24+
25+
26+
class GnoiReboot(host_service.HostModule):
27+
"""DBus endpoint that executes the reboot and returns the reboot status
28+
"""
29+
30+
def __init__(self, mod_name):
31+
"""Use threading.lock mechanism to read/write into response_data
32+
since response_data can be read/write by multiple threads"""
33+
self.lock = threading.Lock()
34+
# reboot_status_flag is used to keep track of reboot status on host
35+
self.reboot_status_flag = {}
36+
# Populating with default value i.e., no active reboot
37+
self.populate_reboot_status_flag()
38+
super(GnoiReboot, self).__init__(mod_name)
39+
40+
def populate_reboot_status_flag(self, active = False, when = 0, reason = ""):
41+
"""Populates the reboot_status_flag with given input params"""
42+
self.lock.acquire()
43+
self.reboot_status_flag["active"] = active
44+
self.reboot_status_flag["when"] = when
45+
self.reboot_status_flag["reason"] = reason
46+
self.lock.release()
47+
return
48+
49+
def validate_reboot_request(self, reboot_request):
50+
# Check whether reboot method is present.
51+
if "method" not in reboot_request:
52+
return 1, "Reboot request must contain a reboot method"
53+
54+
# Check whether reboot method is valid.
55+
rebootmethod = reboot_request["method"]
56+
valid_method = False
57+
for values in [REBOOTMETHOD_COLD_BOOT_VALUES, REBOOTMETHOD_NSF_VALUES]:
58+
if rebootmethod in values:
59+
valid_method = True
60+
if not valid_method:
61+
return 1, "Invalid reboot method: " + str(rebootmethod)
62+
63+
# Check whether delay is non-zero. delay key will not exist in reboot_request if it is zero
64+
if "delay" in reboot_request and reboot_request["delay"] != 0:
65+
return 1, "Delayed reboot is not supported"
66+
return 0, ""
67+
68+
def execute_reboot(self, rebootmethod):
69+
"""Execute reboot and reset reboot_status_flag when reboot fails"""
70+
71+
if rebootmethod in REBOOTMETHOD_COLD_BOOT_VALUES:
72+
command = EXECUTE_COLD_REBOOT_COMMAND
73+
f = open("/tmp/hostlog.txt", "w")
74+
f.write("Received reboot command ! ")
75+
f.close()
76+
logger.warning("%s: Issuing cold reboot", MOD_NAME)
77+
elif rebootmethod in REBOOTMETHOD_NSF_VALUES:
78+
command = EXECUTE_NSF_REBOOT_COMMAND
79+
logger.warning("%s: Issuing NSF reboot", MOD_NAME)
80+
else:
81+
logger.error("%s: Invalid reboot method: %d", MOD_NAME, rebootmethod)
82+
return
83+
84+
rc, stdout, stderr = _run_command(command)
85+
if rc:
86+
self.populate_reboot_status_flag()
87+
logger.error("%s: Reboot failed execution with stdout: %s, "
88+
"stderr: %s", MOD_NAME, stdout, stderr)
89+
return
90+
91+
"""Wait for 260 seconds for the reboot to complete. Here, we expect that SONiC Host Service
92+
will be killed during this waiting period if the reboot is successful. If this module
93+
is still alive after the below waiting period, we can conclude that the reboot has failed.
94+
Each container can take up to 20 seconds to get killed. In total, there are 10 containers,
95+
and adding a buffer of 1 minute brings up the delay value to be 260 seconds."""
96+
time.sleep(REBOOT_TIMEOUT)
97+
# Conclude that the reboot has failed if we reach this point
98+
self.populate_reboot_status_flag()
99+
return
100+
101+
@host_service.method(host_service.bus_name(MOD_NAME), in_signature='as', out_signature='is')
102+
def issue_reboot(self, options):
103+
"""Issues reboot after performing the following steps sequentially:
104+
1. Checks that reboot_status_flag is not set
105+
2. Validates the reboot request
106+
3. Sets the reboot_status_flag
107+
4. Issues the reboot in a separate thread
108+
"""
109+
logger.warning("%s: issue_reboot rpc called", MOD_NAME)
110+
self.lock.acquire()
111+
is_reboot_ongoing = self.reboot_status_flag["active"]
112+
self.lock.release()
113+
# Return without issuing the reboot if the previous reboot is ongoing
114+
if is_reboot_ongoing:
115+
return 1, "Previous reboot is ongoing"
116+
117+
"""Convert input json formatted reboot request into python dict.
118+
reboot_request is a python dict with the following keys:
119+
method - specifies the method of reboot
120+
delay - delay to issue reboot, key exists only if it is non-zero
121+
message - reason for reboot
122+
force - either true/false, key exists only if it is true
123+
"""
124+
try:
125+
reboot_request = json.loads(options[0])
126+
except ValueError:
127+
return 1, "Failed to parse json formatted reboot request into python dict"
128+
129+
# Validate reboot request
130+
err, errstr = self.validate_reboot_request(reboot_request)
131+
if err:
132+
return err, errstr
133+
134+
# Sets reboot_status_flag to be in active state
135+
self.populate_reboot_status_flag(True, int(time.time()), reboot_request["message"])
136+
137+
# Issue reboot in a new thread and reset the reboot_status_flag if the reboot fails
138+
try:
139+
t = threading.Thread(target=self.execute_reboot, args=(reboot_request["method"],))
140+
t.start()
141+
except RuntimeError as error:
142+
return 1, "Failed to start thread to execute reboot with error: " + str(error)
143+
return 0, "Successfully issued reboot"
144+
145+
@host_service.method(host_service.bus_name(MOD_NAME), in_signature='', out_signature='is')
146+
def get_reboot_status(self):
147+
"""Returns current reboot status on host in json format"""
148+
self.lock.acquire()
149+
response_data = json.dumps(self.reboot_status_flag)
150+
self.lock.release()
151+
return 0, response_data
152+
153+
def register():
154+
"""Return the class name"""
155+
return GnoiReboot, MOD_NAME

scripts/sonic-host-server

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import dbus.service
1212
import dbus.mainloop.glib
1313

1414
from gi.repository import GObject
15-
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service
15+
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, gnoi_reboot
1616

1717

1818
def register_dbus():
@@ -21,6 +21,7 @@ def register_dbus():
2121
'config': config_engine.Config('config'),
2222
'gcu': gcu.GCU('gcu'),
2323
'host_service': host_service.HostService('host_service'),
24+
'gnoi_reboot': gnoi_reboot.GnoiReboot('gnoi_reboot'),
2425
'showtech': showtech.Showtech('showtech'),
2526
'systemd': systemd_service.SystemdService('systemd'),
2627
'file_stat': file_service.FileService('file')

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
maintainer = 'Joe LeVeque',
3131
maintainer_email = '[email protected]',
3232
packages = [
33-
'host_modules'
33+
'host_modules',
34+
'utils',
3435
],
3536
scripts = [
3637
'scripts/caclmgrd',

0 commit comments

Comments
 (0)