Skip to content

Commit 6e2e239

Browse files
authored
patronictl demote/promote-cluster (patroni#3405)
implement ctl commands for cluster demotion and promotion that handle the config editing and check the result status
1 parent 9225336 commit 6e2e239

File tree

4 files changed

+193
-6
lines changed

4 files changed

+193
-6
lines changed

patroni/ctl.py

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363

6464
from . import global_config
6565
from .config import Config
66-
from .dcs import AbstractDCS, Cluster, get_dcs as _get_dcs, Member
66+
from .dcs import AbstractDCS, Cluster, get_dcs as _get_dcs, Leader, Member
6767
from .exceptions import PatroniException
6868
from .postgresql.misc import postgres_version_to_int, PostgresqlRole, PostgresqlState
6969
from .postgresql.mpp import get_mpp
@@ -512,11 +512,14 @@ def watching(w: bool, watch: Optional[int], max_count: Optional[int] = None, cle
512512
return
513513

514514
counter = 1
515+
yield_time = time.time()
515516
while watch and counter <= (max_count or counter):
516-
time.sleep(watch)
517+
elapsed = time.time() - yield_time
518+
time.sleep(max(0, watch - elapsed))
517519
counter += 1
518520
if clear:
519521
click.clear()
522+
yield_time = time.time()
520523
yield 0
521524

522525

@@ -2314,3 +2317,115 @@ def format_pg_version(version: int) -> str:
23142317
return "{0}.{1}.{2}".format(version // 10000, version // 100 % 100, version % 100)
23152318
else:
23162319
return "{0}.{1}".format(version // 10000, version % 100)
2320+
2321+
2322+
def change_cluster_role(cluster_name: str, force: bool, standby_config: Optional[Dict[str, Any]]) -> None:
2323+
"""Demote or promote cluster.
2324+
2325+
:param cluster_name: name of the Patroni cluster.
2326+
:param force: if ``True`` run cluster demotion without asking for confirmation.
2327+
:param standby_config: standby cluster configuration to be applied if demotion is requested.
2328+
"""
2329+
demote = bool(standby_config)
2330+
action_name = 'demot' if demote else 'promot'
2331+
target_role = PostgresqlRole.STANDBY_LEADER if demote else PostgresqlRole.PRIMARY
2332+
2333+
dcs = get_dcs(cluster_name, None)
2334+
cluster = dcs.get_cluster()
2335+
leader_name = cluster.leader and cluster.leader.name
2336+
if not leader_name:
2337+
raise PatroniCtlException(f'Cluster has no leader, {action_name}ion is not possible')
2338+
if cluster.leader and cluster.leader.data.get('role') == target_role:
2339+
raise PatroniCtlException('Cluster is already in the required state')
2340+
2341+
click.echo('Current cluster topology')
2342+
output_members(cluster, cluster_name)
2343+
if not force:
2344+
confirm = click.confirm(f'Are you sure you want to {action_name}e {cluster_name} cluster?')
2345+
if not confirm:
2346+
raise PatroniCtlException(f'Aborted cluster {action_name}ion')
2347+
2348+
try:
2349+
if TYPE_CHECKING: # pragma: no cover
2350+
assert isinstance(cluster.leader, Leader)
2351+
r = request_patroni(cluster.leader.member, 'patch', 'config', {'standby_cluster': standby_config})
2352+
2353+
if r.status != 200:
2354+
raise PatroniCtlException(
2355+
f'Failed to {action_name}e {cluster_name} cluster: '
2356+
f'/config PATCH status code={r.status}, ({r.data.decode("utf-8")})')
2357+
except Exception as err:
2358+
raise PatroniCtlException(f'Failed to {action_name}e {cluster_name} cluster: {err}')
2359+
2360+
for _ in watching(True, 1, clear=False):
2361+
cluster = dcs.get_cluster()
2362+
is_unlocked = cluster.is_unlocked()
2363+
leader_role = cluster.leader and cluster.leader.data.get('role')
2364+
leader_state = cluster.leader and cluster.leader.data.get('state')
2365+
old_leader = cluster.get_member(leader_name, False)
2366+
old_leader_state = old_leader and old_leader.data.get('state')
2367+
2368+
if not is_unlocked and leader_role == target_role and leader_state == PostgresqlState.RUNNING:
2369+
if not demote or old_leader_state == PostgresqlState.RUNNING:
2370+
click.echo(
2371+
f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} cluster is successfully {action_name}ed')
2372+
break
2373+
2374+
state_prts = [f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} cluster is unlocked: {is_unlocked}',
2375+
f'leader role: {leader_role}',
2376+
f'leader state: {leader_state}']
2377+
if demote and cluster.leader and leader_name != cluster.leader.name and old_leader_state:
2378+
state_prts.append(f'previous leader state: {repr(old_leader_state)}')
2379+
click.echo(", ".join(state_prts))
2380+
output_members(cluster, cluster_name)
2381+
2382+
2383+
@ctl.command('demote-cluster', help="Demote cluster to a standby cluster")
2384+
@arg_cluster_name
2385+
@option_force
2386+
@click.option('--host', help='Address of the remote node', required=False)
2387+
@click.option('--port', help='Port of the remote node', type=int, required=False)
2388+
@click.option('--restore-command', help='Command to restore WAL records from the remote primary', required=False)
2389+
@click.option('--primary-slot-name', help='Name of the slot on the remote node to use for replication', required=False)
2390+
def demote_cluster(cluster_name: str, force: bool, host: Optional[str], port: Optional[int],
2391+
restore_command: Optional[str], primary_slot_name: Optional[str]) -> None:
2392+
"""Process ``demote-cluster`` command of ``patronictl`` utility.
2393+
2394+
Demote cluster to a standby cluster.
2395+
2396+
:param cluster_name: name of the Patroni cluster.
2397+
:param force: if ``True`` run cluster demotion without asking for confirmation.
2398+
:param host: address of the remote node.
2399+
:param port: port of the remote node.
2400+
:param restore_command: command to restore WAL records from the remote primary'.
2401+
:param primary_slot_name: name of the slot on the remote node to use for replication.
2402+
2403+
:raises:
2404+
:class:`PatroniCtlException`: if:
2405+
* neither ``host`` nor ``port`` nor ``restore_command`` is provided; or
2406+
* cluster has no leader; or
2407+
* cluster is already in the required state; or
2408+
* operation is aborted.
2409+
"""
2410+
if not any((host, port, restore_command)):
2411+
raise PatroniCtlException('At least --host, --port or --restore-command should be specified')
2412+
2413+
data = {k: v for k, v in {'host': host,
2414+
'port': port,
2415+
'primary_slot_name': primary_slot_name,
2416+
'restore_command': restore_command}.items() if v}
2417+
change_cluster_role(cluster_name, force, data)
2418+
2419+
2420+
@ctl.command('promote-cluster', help="Promote cluster, make it run standalone")
2421+
@arg_cluster_name
2422+
@option_force
2423+
def promote_cluster(cluster_name: str, force: bool) -> None:
2424+
"""Process ``promote-cluster`` command of ``patronictl`` utility.
2425+
2426+
Promote cluster, make it run standalone.
2427+
2428+
:param cluster_name: name of the Patroni cluster.
2429+
:param force: if ``True`` run cluster demotion without asking for confirmation.
2430+
"""
2431+
change_cluster_role(cluster_name, force, None)

patroni/ha.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1660,7 +1660,7 @@ def before_shutdown() -> None:
16601660
else:
16611661
if self._rewind.rewind_or_reinitialize_needed_and_possible(leader):
16621662
return False # do not start postgres, but run pg_rewind on the next iteration
1663-
self.state_handler.follow(node_to_follow, role)
1663+
return self.state_handler.follow(node_to_follow, role)
16641664

16651665
def should_run_scheduled_action(self, action_name: str, scheduled_at: Optional[datetime.datetime],
16661666
cleanup_fn: Callable[..., Any]) -> bool:

tests/test_ctl.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from prettytable import PrettyTable
1313

1414
from patroni.ctl import CtlPostgresqlRole
15-
from patroni.postgresql.misc import PostgresqlState
15+
from patroni.dcs import ClusterConfig, Leader, Member, SyncState
16+
from patroni.postgresql.misc import PostgresqlRole, PostgresqlState
1617

1718
try:
1819
from prettytable import HRuleStyle
@@ -35,7 +36,8 @@
3536
from . import MockConnect, MockCursor, MockResponse, psycopg_connect
3637
from .test_etcd import etcd_read, socket_getaddrinfo
3738
from .test_ha import get_cluster, get_cluster_initialized_with_leader, get_cluster_initialized_with_only_leader, \
38-
get_cluster_initialized_without_leader, get_cluster_not_initialized_without_leader, Member
39+
get_cluster_initialized_without_leader, get_cluster_not_initialized_without_leader, \
40+
get_standby_cluster_initialized_with_only_leader
3941

4042

4143
def get_default_config(*args):
@@ -784,6 +786,74 @@ def test_reinit_wait(self):
784786
self.assertIn("Waiting for reinitialize to complete on: other", result.output)
785787
self.assertIn("Reinitialize is completed on: other", result.output)
786788

789+
@patch('patroni.ctl.watching', Mock(return_value=[0, 0]))
790+
@patch('patroni.ctl.request_patroni')
791+
def test_cluster_demote(self, mock_patch):
792+
m1 = Member(0, 'new_leader', 28, {'conn_url': 'postgres://replicator:[email protected]:5435/postgres',
793+
'role': PostgresqlRole.STANDBY_LEADER, 'state': 'running'})
794+
m2 = Member(0, 'leader', 28, {'conn_url': 'postgres://replicator:[email protected]:5435/postgres',
795+
'role': PostgresqlRole.PRIMARY, 'state': 'stopping'})
796+
standby_leader = Leader(0, 0, m1)
797+
leader = Leader(0, 0, m2)
798+
original_cluster = get_cluster('12345678901', leader, [m1, m2], None, SyncState.empty(), None, 1)
799+
standby_cluster = get_cluster(
800+
'12345678901', standby_leader, [m1, m2], None, SyncState.empty(),
801+
ClusterConfig(1, {"standby_cluster": {"host": "localhost", "port": 5432, "primary_slot_name": ""}}, 1))
802+
803+
# no option provided
804+
self.runner.invoke(ctl, ['demote-cluster', 'dummy'])
805+
# no leader
806+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_without_leader())):
807+
result = self.runner.invoke(ctl, ['demote-cluster', 'dummy', '--restore-command', 'foo'])
808+
assert 'Cluster has no leader, demotion is not possible' in result.output
809+
# aborted
810+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=original_cluster)):
811+
result = self.runner.invoke(ctl, ['demote-cluster', 'dummy', '--restore-command', 'foo'], input='N')
812+
assert 'Aborted' in result.output
813+
# already required state
814+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=standby_cluster)):
815+
result = self.runner.invoke(ctl, ['demote-cluster', 'dummy', '--restore-command', 'foo'])
816+
assert 'Cluster is already in the required state' in result.output
817+
818+
mock_patch.return_value.status = 200
819+
# success
820+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(side_effect=[original_cluster, original_cluster,
821+
standby_cluster])):
822+
result = self.runner.invoke(ctl, ['demote-cluster', 'dummy', '--restore-command', 'foo', '--force'])
823+
assert result.exit_code == 0
824+
825+
@patch('patroni.ctl.polling_loop', Mock(return_value=[0, 0]))
826+
@patch('patroni.ctl.request_patroni')
827+
def test_cluster_promote(self, mock_patch):
828+
only_leader_cluster = get_cluster_initialized_with_only_leader()
829+
standby_cluster = get_standby_cluster_initialized_with_only_leader()
830+
# no leader
831+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=get_cluster_initialized_without_leader())):
832+
result = self.runner.invoke(ctl, ['promote-cluster', 'dummy'])
833+
assert 'Cluster has no leader, promotion is not possible' in result.output
834+
# aborted
835+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=standby_cluster)):
836+
result = self.runner.invoke(ctl, ['promote-cluster', 'dummy'])
837+
assert 'Aborted' in result.output
838+
# already required state
839+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(return_value=only_leader_cluster)):
840+
result = self.runner.invoke(ctl, ['promote-cluster', 'dummy'])
841+
assert 'Cluster is already in the required state' in result.output
842+
# PATCH error
843+
mock_patch.return_value.status = 500
844+
result = self.runner.invoke(ctl, ['demote-cluster', 'dummy', '--restore-command', 'foo', '--force'])
845+
assert 'Failed to demote' in result.output
846+
# Exception
847+
with patch('patroni.ctl.request_patroni', Mock(side_effect=Exception)):
848+
result = self.runner.invoke(ctl, ['demote-cluster', 'dummy', '--restore-command', 'foo', '--force'])
849+
assert 'Failed to demote' in result.output
850+
# success
851+
mock_patch.return_value.status = 200
852+
with patch('patroni.dcs.AbstractDCS.get_cluster', Mock(side_effect=[standby_cluster, standby_cluster,
853+
only_leader_cluster])):
854+
result = self.runner.invoke(ctl, ['promote-cluster', 'dummy', '--force'])
855+
assert result.exit_code == 0
856+
787857

788858
class TestPatronictlPrettyTable(unittest.TestCase):
789859

tests/test_ha.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,16 @@ def get_cluster_initialized_with_only_leader(failover=None, cluster_config=None)
8585

8686

8787
def get_standby_cluster_initialized_with_only_leader(failover=None, sync=None):
88-
return get_cluster_initialized_with_only_leader(
88+
cluster = get_cluster_initialized_with_only_leader(
8989
cluster_config=ClusterConfig(1, {
9090
"standby_cluster": {
9191
"host": "localhost",
9292
"port": 5432,
9393
"primary_slot_name": "",
9494
}}, 1)
9595
)
96+
cluster.leader.data['role'] = PostgresqlRole.STANDBY_LEADER
97+
return cluster
9698

9799

98100
def get_cluster_initialized_with_leader_and_failsafe():

0 commit comments

Comments
 (0)