1414# License for the specific language governing permissions and limitations
1515# under the License.
1616
17- # Trivial HTTP server to check health of scheduler, backup and volume services.
18- # Cinder-API hast its own health check endpoint and does not need this.
19- #
20- # The only check this server currently does is using the heartbeat in the
21- # database service table, accessing the DB directly here using cinder's
22- # configuration options.
23- #
24- # The benefit of accessing the DB directly is that it doesn't depend on the
25- # Cinder-API service being up and we can also differentiate between the
26- # container not having a connection to the DB and the cinder service not doing
27- # the heartbeats.
28- #
29- # For volume services all enabled backends must be up to return 200, so it is
30- # recommended to use a different pod for each backend to avoid one backend
31- # affecting others.
32- #
33- # Requires the name of the service as the first argument (volume, backup,
34- # scheduler) and optionally a second argument with the location of the
35- # configuration directory (defaults to /etc/cinder/cinder.conf.d)
3617
3718from http import server
3819import signal
3920import socket
4021import sys
4122import time
4223import threading
24+ import requests
4325
4426from oslo_config import cfg
4527
28+
4629SERVER_PORT = 8080
4730CONF = cfg .CONF
4831
32+
4933class HTTPServerV6 (server .HTTPServer ):
50- address_family = socket .AF_INET6
34+ address_family = socket .AF_INET6
35+
5136
5237class HeartbeatServer (server .BaseHTTPRequestHandler ):
38+
39+ @staticmethod
40+ def check_services ():
41+ print ("Starting health checks" )
42+ results = {}
43+
44+ # Todo Database Endpoint Reachability
45+ # Keystone Endpoint Reachability
46+ try :
47+ keystone_uri = CONF .keystone_authtoken .auth_url
48+ response = requests .get (keystone_uri , timeout = 5 )
49+ response .raise_for_status ()
50+ server_header = response .headers .get ('Server' , '' ).lower ()
51+ if 'keystone' in server_header :
52+ results ['keystone_endpoint' ] = 'OK'
53+ print ("Keystone endpoint reachable and responsive." )
54+ else :
55+ results ['keystone_endpoint' ] = 'WARN'
56+ print (f"Keystone endpoint reachable, but not a valid Keystone service: { keystone_uri } " )
57+ except requests .exceptions .RequestException as e :
58+ results ['keystone_endpoint' ] = 'FAIL'
59+ print (f"ERROR: Keystone endpoint check failed: { e } " )
60+ raise Exception ('ERROR: Keystone check failed' , e )
61+
62+ # Prometheus Collector Endpoint Reachability
63+ try :
64+ prometheus_url = CONF .collector_prometheus .prometheus_url
65+ insecure = CONF .collector_prometheus .insecure
66+ cafile = CONF .collector_prometheus .cafile
67+ verify_ssl = cafile if cafile and not insecure else not insecure
68+
69+ response = requests .get (prometheus_url , timeout = 5 , verify = verify_ssl )
70+ response .raise_for_status ()
71+ results ['collector_endpoint' ] = 'OK'
72+ print ("Prometheus collector endpoint reachable." )
73+ except requests .exceptions .RequestException as e :
74+ results ['collector_endpoint' ] = 'FAIL'
75+ print (f"ERROR: Prometheus collector check failed: { e } " )
76+ raise Exception ('ERROR: Prometheus collector check failed' , e )
77+
5378 def do_GET (self ):
79+ try :
80+ self .check_services ()
81+ except Exception as exc :
82+ self .send_error (500 , exc .args [0 ], exc .args [1 ])
83+ return
84+
5485 self .send_response (200 )
5586 self .send_header ("Content-type" , "text/html" )
5687 self .end_headers ()
@@ -68,12 +99,41 @@ def stopper(signal_number=None, frame=None):
6899
69100
70101if __name__ == "__main__" :
102+ # Register config options
103+ cfg .CONF .register_group (cfg .OptGroup (name = 'database' , title = 'Database connection options' ))
104+ cfg .CONF .register_opt (cfg .StrOpt ('connection' , default = None ), group = 'database' )
105+
106+ cfg .CONF .register_group (cfg .OptGroup (name = 'keystone_authtoken' , title = 'Keystone Auth Token Options' ))
107+ cfg .CONF .register_opt (cfg .StrOpt ('auth_url' ,
108+ default = 'https://keystone-internal.openstack.svc:5000' ),
109+ group = 'keystone_authtoken' )
110+
111+ cfg .CONF .register_group (cfg .OptGroup (name = 'collector_prometheus' , title = 'Prometheus Collector Options' ))
112+ cfg .CONF .register_opt (cfg .StrOpt ('prometheus_url' ,
113+ default = 'http://metric-storage-prometheus.openstack.svc:9090' ),
114+ group = 'collector_prometheus' )
115+ cfg .CONF .register_opt (cfg .BoolOpt ('insecure' , default = False ), group = 'collector_prometheus' )
116+ cfg .CONF .register_opt (cfg .StrOpt ('cafile' , default = None ), group = 'collector_prometheus' )
117+
118+ # Load configuration from file
119+ try :
120+ cfg .CONF (sys .argv [1 :], default_config_files = ['/etc/cloudkitty/cloudkitty.conf.d/cloudkitty.conf' ])
121+ except cfg .ConfigFilesNotFoundError as e :
122+ print (f"Health check failed: { e } " , file = sys .stderr )
123+ sys .exit (1 )
124+
125+ # Detect IPv6 support for binding
71126 hostname = socket .gethostname ()
72- ipv6_address = socket .getaddrinfo (hostname , None , socket .AF_INET6 )
127+ try :
128+ ipv6_address = socket .getaddrinfo (hostname , None , socket .AF_INET6 )
129+ except socket .gaierror :
130+ ipv6_address = None
131+
73132 if ipv6_address :
74- webServer = HTTPServerV6 (("::" ,SERVER_PORT ), HeartbeatServer )
133+ webServer = HTTPServerV6 (("::" , SERVER_PORT ), HeartbeatServer )
75134 else :
76135 webServer = server .HTTPServer (("0.0.0.0" , SERVER_PORT ), HeartbeatServer )
136+
77137 stop = get_stopper (webServer )
78138
79139 # Need to run the server on a different thread because its shutdown method
@@ -91,4 +151,4 @@ def stopper(signal_number=None, frame=None):
91151 except KeyboardInterrupt :
92152 pass
93153 finally :
94- stop ()
154+ stop ()
0 commit comments