11
11
import re
12
12
import shutil
13
13
import subprocess
14
+ from asyncio import as_completed , create_task , run , wait
15
+ from contextlib import suppress
14
16
from pathlib import Path
17
+ from ssl import CERT_NONE , create_default_context
15
18
from typing import TYPE_CHECKING , Any , TypedDict
16
19
17
20
import charm_refresh
18
21
import psutil
19
22
import requests
20
23
from charms .operator_libs_linux .v2 import snap
24
+ from httpx import AsyncClient , BasicAuth , HTTPError
21
25
from jinja2 import Template
22
26
from ops import BlockedStatus
23
27
from pysyncobj .utility import TcpUtility , UtilityException
24
28
from tenacity import (
25
- AttemptManager ,
26
29
RetryError ,
27
30
Retrying ,
28
31
retry ,
@@ -172,6 +175,10 @@ def __init__(
172
175
def _patroni_auth (self ) -> requests .auth .HTTPBasicAuth :
173
176
return requests .auth .HTTPBasicAuth ("patroni" , self .patroni_password )
174
177
178
+ @property
179
+ def _patroni_async_auth (self ) -> BasicAuth :
180
+ return BasicAuth ("patroni" , password = self .patroni_password )
181
+
175
182
@property
176
183
def _patroni_url (self ) -> str :
177
184
"""Patroni REST API URL."""
@@ -249,28 +256,14 @@ def get_postgresql_version(self) -> str:
249
256
if snp ["name" ] == charm_refresh .snap_name ():
250
257
return snp ["version" ]
251
258
252
- def cluster_status (
253
- self , alternative_endpoints : list | None = None
254
- ) -> list [ClusterMember ] | None :
259
+ def cluster_status (self , alternative_endpoints : list | None = None ) -> list [ClusterMember ]:
255
260
"""Query the cluster status."""
256
261
# Request info from cluster endpoint (which returns all members of the cluster).
257
- # TODO we don't know the other cluster's ca
258
- verify = self .verify if not alternative_endpoints else False
259
- for attempt in Retrying (
260
- stop = stop_after_attempt (
261
- len (alternative_endpoints ) if alternative_endpoints else len (self .peers_ips )
262
- )
262
+ if response := self .parallel_patroni_get_request (
263
+ f"/{ PATRONI_CLUSTER_STATUS_ENDPOINT } " , alternative_endpoints
263
264
):
264
- with attempt :
265
- request_url = self ._get_alternative_patroni_url (attempt , alternative_endpoints )
266
-
267
- cluster_status = requests .get (
268
- f"{ request_url } /{ PATRONI_CLUSTER_STATUS_ENDPOINT } " ,
269
- verify = verify ,
270
- timeout = API_REQUEST_TIMEOUT ,
271
- auth = self ._patroni_auth ,
272
- )
273
- return cluster_status .json ()["members" ]
265
+ return response ["members" ]
266
+ raise RetryError (last_attempt = Exception ("Unable to reach any units" ))
274
267
275
268
def get_member_ip (self , member_name : str ) -> str | None :
276
269
"""Get cluster member IP address.
@@ -281,13 +274,14 @@ def get_member_ip(self, member_name: str) -> str | None:
281
274
Returns:
282
275
IP address of the cluster member.
283
276
"""
284
- cluster_status = self .cluster_status ()
285
- if not cluster_status :
286
- return
277
+ try :
278
+ cluster_status = self .cluster_status ()
287
279
288
- for member in cluster_status :
289
- if member ["name" ] == member_name :
290
- return member ["host" ]
280
+ for member in cluster_status :
281
+ if member ["name" ] == member_name :
282
+ return member ["host" ]
283
+ except RetryError :
284
+ logger .debug ("Unable to get IP. Cluster status unreachable" )
291
285
292
286
def get_member_status (self , member_name : str ) -> str :
293
287
"""Get cluster member status.
@@ -307,6 +301,44 @@ def get_member_status(self, member_name: str) -> str:
307
301
return member ["state" ]
308
302
return ""
309
303
304
+ async def _httpx_get_request (self , url : str , verify : bool = True ):
305
+ ssl_ctx = create_default_context ()
306
+ if verify :
307
+ with suppress (FileNotFoundError ):
308
+ ssl_ctx .load_verify_locations (cafile = f"{ PATRONI_CONF_PATH } /{ TLS_CA_BUNDLE_FILE } " )
309
+ else :
310
+ ssl_ctx .check_hostname = False
311
+ ssl_ctx .verify_mode = CERT_NONE
312
+ async with AsyncClient (
313
+ auth = self ._patroni_async_auth , timeout = API_REQUEST_TIMEOUT , verify = ssl_ctx
314
+ ) as client :
315
+ try :
316
+ return (await client .get (url )).json ()
317
+ except (HTTPError , ValueError ):
318
+ return None
319
+
320
+ async def _async_get_request (self , uri : str , endpoints : list [str ], verify : bool = True ):
321
+ tasks = [
322
+ create_task (self ._httpx_get_request (f"https://{ ip } :8008{ uri } " , verify ))
323
+ for ip in endpoints
324
+ ]
325
+ for task in as_completed (tasks ):
326
+ if result := await task :
327
+ for task in tasks :
328
+ task .cancel ()
329
+ await wait (tasks )
330
+ return result
331
+
332
+ def parallel_patroni_get_request (self , uri : str , endpoints : list [str ] | None = None ) -> dict :
333
+ """Call all possible patroni endpoints in parallel."""
334
+ if not endpoints :
335
+ endpoints = (self .unit_ip , * self .peers_ips )
336
+ verify = True
337
+ else :
338
+ # TODO we don't know the other cluster's ca
339
+ verify = False
340
+ return run (self ._async_get_request (uri , endpoints , verify ))
341
+
310
342
def get_primary (
311
343
self , unit_name_pattern = False , alternative_endpoints : list [str ] | None = None
312
344
) -> str | None :
@@ -320,14 +352,17 @@ def get_primary(
320
352
primary pod or unit name.
321
353
"""
322
354
# Request info from cluster endpoint (which returns all members of the cluster).
323
- if cluster_status := self .cluster_status (alternative_endpoints ):
355
+ try :
356
+ cluster_status = self .cluster_status (alternative_endpoints )
324
357
for member in cluster_status :
325
358
if member ["role" ] == "leader" :
326
359
primary = member ["name" ]
327
360
if unit_name_pattern :
328
361
# Change the last dash to / in order to match unit name pattern.
329
362
primary = label2name (primary )
330
363
return primary
364
+ except RetryError :
365
+ logger .debug ("Unable to get primary. Cluster status unreachable" )
331
366
332
367
def get_standby_leader (
333
368
self , unit_name_pattern = False , check_whether_is_running : bool = False
@@ -366,31 +401,6 @@ def get_sync_standby_names(self) -> list[str]:
366
401
sync_standbys .append (label2name (member ["name" ]))
367
402
return sync_standbys
368
403
369
- def _get_alternative_patroni_url (
370
- self , attempt : AttemptManager , alternative_endpoints : list [str ] | None = None
371
- ) -> str :
372
- """Get an alternative REST API URL from another member each time.
373
-
374
- When the Patroni process is not running in the current unit it's needed
375
- to use a URL from another cluster member REST API to do some operations.
376
- """
377
- if alternative_endpoints is not None :
378
- return self ._patroni_url .replace (
379
- self .unit_ip , alternative_endpoints [attempt .retry_state .attempt_number - 1 ]
380
- )
381
- attempt_number = attempt .retry_state .attempt_number
382
- if attempt_number > 1 :
383
- url = self ._patroni_url
384
- if (attempt_number - 1 ) <= len (self .peers_ips ):
385
- unit_number = attempt_number - 2
386
- else :
387
- unit_number = attempt_number - 2 - len (self .peers_ips )
388
- other_unit_ip = list (self .peers_ips )[unit_number ]
389
- url = url .replace (self .unit_ip , other_unit_ip )
390
- else :
391
- url = self ._patroni_url
392
- return url
393
-
394
404
def are_all_members_ready (self ) -> bool :
395
405
"""Check if all members are correctly running Patroni and PostgreSQL.
396
406
0 commit comments