Skip to content

Commit 9bb1928

Browse files
committed
Check if host is already off if redfish api call returns 409
If a Dell server is already off when trying to fence it the idrac will return "HTTP/1.1 409 Conflict". Let's try to handle this condition gracefully, performing one more api call to assess the server power status instead of failing immediately. If the idrac returns 409 and powerstatus="off" we can assume the server is in the proper state and can be considered successfully fenced, thus safe to evacuate. This commit also removes leftover exception debug lines that would expose secrets in the logs if ipmi is used. Jira: https://issues.redhat.com/browse/OSPRH-20661 Closes: #434
1 parent 442e0bf commit 9bb1928

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

templates/instanceha/bin/instanceha.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,17 @@ def _redfish_reset(url, user, passwd, timeout, action):
541541
r = requests.post(url, data=json.dumps(payload), headers=headers, auth=(user, passwd), verify=False, timeout=timeout)
542542
return r
543543

544+
def _redfish_get_power_state(url, user, passwd, timeout):
545+
"""Get the power state from Redfish API"""
546+
try:
547+
r = requests.get(url, auth=(user, passwd), verify=False, timeout=timeout)
548+
if r.status_code == 200:
549+
data = r.json()
550+
return data.get('PowerState', '').upper()
551+
except Exception as e:
552+
logging.error('Failed to get power state: %s' % str(e))
553+
return None
554+
544555
def _bmh_fence(token, namespace, host, action):
545556

546557
url = "https://kubernetes.default.svc/apis/metal3.io/v1alpha1/namespaces/%s/baremetalhosts/%s?fieldManager=kubectl-patch" % (namespace, host)
@@ -604,7 +615,7 @@ def _host_fence(host, action):
604615
logging.error('Timeout expired while sending IPMI command for power off of %s' % host)
605616
return False
606617
except subprocess.CalledProcessError as e:
607-
logging.error('Error while sending IPMI command for power off of %s: %s' % (host, e))
618+
logging.error('Error while sending IPMI command for power off of %s' % host)
608619
return False
609620

610621
logging.info('Successfully powered off %s' % host)
@@ -617,7 +628,7 @@ def _host_fence(host, action):
617628
logging.error('Timeout expired while sending IPMI command for power on of %s' % host)
618629
return False
619630
except subprocess.CalledProcessError as e:
620-
logging.error('Error while sending IPMI command for power on of %s: %s' % (host, e))
631+
logging.error('Error while sending IPMI command for power on of %s' % host)
621632
return False
622633

623634
logging.info('Successfully powered on %s' % host)
@@ -648,6 +659,15 @@ def _host_fence(host, action):
648659
if r.status_code in [200, 204]:
649660
logging.info('Power off of %s ok' % host)
650661
return True
662+
elif r.status_code == 409:
663+
# Check if server is already powered off
664+
power_state = _redfish_get_power_state(url, user, passwd, timeout)
665+
if power_state == 'OFF':
666+
logging.info('Power off of %s ok (already off)' % host)
667+
return True
668+
else:
669+
logging.error('Could not power off %s (409 but not OFF: %s)' % (host, power_state))
670+
return False
651671
else:
652672
logging.error('Could not power off %s' % host)
653673
return False

0 commit comments

Comments
 (0)