Skip to content

Commit 74d9390

Browse files
authored
Merge pull request ceph#55537 from guits/node-proxy-handle-fetch-oob-error
node-proxy: improve http error handling in fetch_oob_details
2 parents 5e5ae5c + 056d4f7 commit 74d9390

File tree

6 files changed

+49
-12
lines changed

6 files changed

+49
-12
lines changed

src/ceph-node-proxy/ceph_node_proxy/main.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from ceph_node_proxy.api import NodeProxyApi
33
from ceph_node_proxy.reporter import Reporter
44
from ceph_node_proxy.util import Config, get_logger, http_req, write_tmp_file, CONFIG
5+
from urllib.error import HTTPError
56
from typing import Dict, Any, Optional
67

78
import argparse
@@ -33,6 +34,8 @@ def __init__(self, **kw: Any) -> None:
3334
self.cephx = {'cephx': {'name': self.cephx_name,
3435
'secret': self.cephx_secret}}
3536
self.config = Config('/etc/ceph/node-proxy.yml', config=CONFIG)
37+
self.username: str = ''
38+
self.password: str = ''
3639

3740
def run(self) -> None:
3841
self.init()
@@ -44,15 +47,16 @@ def init(self) -> None:
4447
self.init_api()
4548

4649
def fetch_oob_details(self) -> Dict[str, str]:
47-
headers, result, status = http_req(hostname=self.mgr_host,
48-
port=self.mgr_agent_port,
49-
data=json.dumps(self.cephx),
50-
endpoint='/node-proxy/oob',
51-
ssl_ctx=self.ssl_ctx)
52-
if status != 200:
53-
msg = f'No out of band tool details could be loaded: {status}, {result}'
50+
try:
51+
headers, result, status = http_req(hostname=self.mgr_host,
52+
port=self.mgr_agent_port,
53+
data=json.dumps(self.cephx),
54+
endpoint='/node-proxy/oob',
55+
ssl_ctx=self.ssl_ctx)
56+
except HTTPError as e:
57+
msg = f'No out of band tool details could be loaded: {e.code}, {e.reason}'
5458
self.log.debug(msg)
55-
raise RuntimeError(msg)
59+
raise
5660

5761
result_json = json.loads(result)
5862
oob_details: Dict[str, str] = {
@@ -64,9 +68,13 @@ def fetch_oob_details(self) -> Dict[str, str]:
6468
return oob_details
6569

6670
def init_system(self) -> None:
67-
oob_details = self.fetch_oob_details()
68-
self.username: str = oob_details['username']
69-
self.password: str = oob_details['password']
71+
try:
72+
oob_details = self.fetch_oob_details()
73+
self.username = oob_details['username']
74+
self.password = oob_details['password']
75+
except HTTPError:
76+
self.log.warning('No oob details could be loaded, exiting...')
77+
raise SystemExit(1)
7078
try:
7179
self.system = RedfishDellSystem(host=oob_details['host'],
7280
port=oob_details['port'],

src/ceph-node-proxy/ceph_node_proxy/reporter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def main(self) -> None:
6363
self.system.previous_data = self.system.get_system()
6464
else:
6565
self.log.debug('no diff, not sending data to the mgr.')
66-
time.sleep(5)
6766
self.log.debug('lock released in reporter loop.')
67+
time.sleep(5)
6868
self.log.debug('exiting reporter loop.')
6969
raise SystemExit(0)

src/pybind/mgr/cephadm/module.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,6 +1716,10 @@ def hardware_powercycle(self, hostname: str, yes_i_really_mean_it: bool = False)
17161716
raise OrchestratorValidationError(f"Can't perform powercycle on node {hostname}: {e}")
17171717
return f'Powercycle scheduled on {hostname}'
17181718

1719+
@handle_orch_error
1720+
def node_proxy_fullreport(self, hostname: Optional[str] = None) -> Dict[str, Any]:
1721+
return self.node_proxy_cache.fullreport(hostname=hostname)
1722+
17191723
@handle_orch_error
17201724
def node_proxy_summary(self, hostname: Optional[str] = None) -> Dict[str, Any]:
17211725
return self.node_proxy_cache.summary(hostname=hostname)

src/pybind/mgr/cephadm/serve.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,13 @@ def update_progress() -> None:
888888
hosts_altered.add(d.hostname)
889889
break
890890

891+
# do not attempt to deploy node-proxy agent when oob details are not provided.
892+
if slot.daemon_type == 'node-proxy' and slot.hostname not in self.mgr.node_proxy_cache.oob.keys():
893+
self.log.debug(
894+
f'Not deploying node-proxy agent on {slot.hostname} as oob details are not present.'
895+
)
896+
continue
897+
891898
# deploy new daemon
892899
daemon_id = slot.name
893900

src/pybind/mgr/orchestrator/_interface.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,14 @@ def node_proxy_summary(self, hostname: Optional[str] = None) -> OrchResult[Dict[
403403
"""
404404
raise NotImplementedError()
405405

406+
def node_proxy_fullreport(self, hostname: Optional[str] = None) -> OrchResult[Dict[str, Any]]:
407+
"""
408+
Return node-proxy full report
409+
410+
:param hostname: hostname
411+
"""
412+
raise NotImplementedError()
413+
406414
def node_proxy_firmwares(self, hostname: Optional[str] = None) -> OrchResult[Dict[str, Any]]:
407415
"""
408416
Return node-proxy firmwares report

src/pybind/mgr/orchestrator/module.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ def _hardware_status(self, hostname: Optional[str] = None, _end_positional_: int
497497
"""
498498
table_heading_mapping = {
499499
'summary': ['HOST', 'STORAGE', 'CPU', 'NET', 'MEMORY', 'POWER', 'FANS'],
500+
'fullreport': [],
500501
'firmwares': ['HOST', 'COMPONENT', 'NAME', 'DATE', 'VERSION', 'STATUS'],
501502
'criticals': ['HOST', 'COMPONENT', 'NAME', 'STATUS', 'STATE'],
502503
'memory': ['HOST', 'NAME', 'STATUS', 'STATE'],
@@ -525,6 +526,15 @@ def _hardware_status(self, hostname: Optional[str] = None, _end_positional_: int
525526
row.extend([v['status'][key] for key in ['storage', 'processors', 'network', 'memory', 'power', 'fans']])
526527
table.add_row(row)
527528
output = table.get_string()
529+
elif category == 'fullreport':
530+
if hostname is None:
531+
output = "Missing host name"
532+
elif format != Format.json:
533+
output = "fullreport only supports json output"
534+
else:
535+
completion = self.node_proxy_fullreport(hostname=hostname)
536+
fullreport: Dict[str, Any] = raise_if_exception(completion)
537+
output = json.dumps(fullreport)
528538
elif category == 'firmwares':
529539
output = "Missing host name" if hostname is None else self._firmwares_table(hostname, table, format)
530540
elif category == 'criticals':

0 commit comments

Comments
 (0)