Skip to content
This repository was archived by the owner on Jan 1, 2024. It is now read-only.

Commit 00b4569

Browse files
authored
Add the ability to choose a control instance among the leaders (#400)
Closes #396 Before the patch, the control instance was chosen among all live instances. Now added the ability to select a control instance only among leaders using the `cartridge_force_leader_control_instance` flag
1 parent e8771dc commit 00b4569

File tree

10 files changed

+95
-17
lines changed

10 files changed

+95
-17
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ README.md to use the newest tag with new release
1313
### Added
1414

1515
- Add `cartridge_log_dir_parent` to configure directory of logs
16+
- Add `cartridge_force_leader_control_instance` variable to choose a control
17+
instance among the leaders
1618

1719
### Fixed
1820

defaults/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ cartridge_not_save_cookie_in_app_config: false
88
cartridge_remove_temporary_files: false
99
cartridge_ignore_split_brain: false
1010
cartridge_paths_to_keep_on_cleanup: []
11+
cartridge_force_leader_control_instance: false
1112

1213
# Role scenario configuration
1314

doc/steps.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ This is instance that is:
236236
otherwise, any instance that should be joined during current play is chosen;
237237
* control instance should have minimal Cartridge version across all suitable
238238
instances (because Cartridge two-phase commit should be called by instance
239-
that has lowest version).
239+
that has lowest version);
240+
* control instance is a leader if `cartridge_force_leader_control_instance` is set.
240241

241242
Steps that require control instance (such as [`edit_topology`](#step-edit_topology))
242243
call `set_control_instance` implicitly if `control_instance` variable isn't set.
@@ -251,6 +252,7 @@ Input variables from config:
251252
- `expelled` - indicates if instance must be expelled from topology;
252253
- `stateboard` - indicates that the instance is a stateboard;
253254
- `replicaset_alias` - replicaset alias, will be displayed in Web UI;
255+
- `cartridge_force_leader_control_instance` - indicates that only a leader can be selected as a control instance.
254256

255257
Output variables:
256258

doc/variables.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ failover.
1717
- `cartridge_paths_to_keep_on_cleanup` (`list-of-strings`, default: `[]`): list of paths that are
1818
absolute or relative to `work/memtx/vinyl/wal` directory that should be kept on instance
1919
cleanup (`config` and` .tarantool.cookie` will be kept independently of this variable); it's
20-
possible to use bash patterns, e.g. `*.control`.
20+
possible to use bash patterns, e.g. `*.control`;
21+
- `cartridge_force_leader_control_instance` (`boolean`, default: `false`): flag indicates that only a leader
22+
can be selected as a control instance.
2123

2224
## Role scenario configuration
2325

library/cartridge_failover_promote.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,6 @@ def check_leaders_promotion_is_possible(control_console):
3434
return None
3535

3636

37-
def get_active_leaders(control_console):
38-
active_leaders, _ = control_console.eval_res_err('''
39-
return require('cartridge.failover').get_active_leaders()
40-
''')
41-
42-
return active_leaders
43-
44-
4537
def call_failover_promote(control_console, replicaset_leaders, force_inconsistency):
4638
opts = {
4739
'force_inconsistency': force_inconsistency,
@@ -184,7 +176,7 @@ def failover_promote(params):
184176
# set two-phase commit opts
185177
helpers.set_twophase_options_from_params(control_console, params)
186178

187-
active_leaders = get_active_leaders(control_console)
179+
active_leaders, _ = helpers.get_active_leaders(control_console)
188180

189181
_, err = call_failover_promote(control_console, replicaset_leaders, force_inconsistency)
190182
if err is not None:
@@ -194,7 +186,7 @@ def failover_promote(params):
194186
warnings=critical_warnings,
195187
)
196188

197-
new_active_leaders = get_active_leaders(control_console)
189+
new_active_leaders, _ = helpers.get_active_leaders(control_console)
198190

199191
if critical_warnings:
200192
return helpers.ModuleRes(

library/cartridge_get_control_instance.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
'play_hosts': {'required': True, 'type': 'list'},
99
'console_sock': {'required': True, 'type': 'str'},
1010
'app_name': {'required': True, 'type': 'str'},
11+
'leader_only': {'required': False, 'type': 'bool', 'default': False},
1112
}
1213

1314
GET_TWOPHASE_COMMIT_VERSION_TIMEOUT = 60
@@ -94,14 +95,26 @@ def candidate_is_ok(uri, names_by_uris, module_hostvars, cluster_disabled_instan
9495
return helpers.is_enabled(instance_vars)
9596

9697

97-
def get_control_instance_name(module_hostvars, cluster_disabled_instances, play_hosts, control_console):
98+
def get_control_instance_name(
99+
module_hostvars,
100+
cluster_disabled_instances,
101+
play_hosts,
102+
control_console,
103+
leader_only=False,
104+
):
98105
members, err = get_membership_members(control_console)
106+
if err is not None:
107+
return None, err
108+
leaders, err = helpers.get_active_leaders(control_console)
99109
if err is not None:
100110
return None, err
101111

112+
leaders_uuid = set(leaders.values())
113+
102114
alien_members_uris = []
103115
members_without_uuid = []
104116
members_by_uuid = {}
117+
leaders_uris = []
105118

106119
for uri, member in sorted(members.items()):
107120
if not member:
@@ -130,6 +143,8 @@ def get_control_instance_name(module_hostvars, cluster_disabled_instances, play_
130143
continue
131144

132145
members_by_uuid[member_uuid] = member
146+
if member_uuid in leaders_uuid:
147+
leaders_uris.append(uri)
133148

134149
if alien_members_uris:
135150
helpers.warn('Incorrect members with the following URIs ignored: %s' % ', '.join(alien_members_uris))
@@ -206,9 +221,17 @@ def get_control_instance_name(module_hostvars, cluster_disabled_instances, play_
206221
if err is not None:
207222
return None, "Failed to check instances two-phase commit version: %s" % err
208223

209-
idx = twophase_commit_versions.index(min(twophase_commit_versions))
210-
control_instance_uri = candidates_uris[idx]
224+
min_version = min(twophase_commit_versions)
225+
min_version_candidates = filter(lambda c: twophase_commit_versions[c[0]] == min_version, enumerate(candidates_uris))
226+
candidates_uris = list(map(lambda c: c[1], min_version_candidates))
227+
228+
if leader_only:
229+
leader_candidates_uris = list(filter(lambda uri: uri in leaders_uris, candidates_uris))
230+
if len(leader_candidates_uris) == 0:
231+
return None, "Not found any leader instance between the candidates: %s" % ', '.join(candidates_uris)
232+
candidates_uris = leader_candidates_uris
211233

234+
control_instance_uri = candidates_uris[0]
212235
control_instance_name = names_by_uris[control_instance_uri]
213236

214237
return control_instance_name, None
@@ -220,11 +243,12 @@ def get_control_instance(params):
220243
play_hosts = params['play_hosts']
221244
console_sock = params['console_sock']
222245
app_name = params['app_name']
246+
leader_only = params.get('leader_only', False)
223247

224248
control_console = helpers.get_control_console(console_sock)
225249

226250
control_instance_name, err = get_control_instance_name(
227-
module_hostvars, cluster_disabled_instances, play_hosts, control_console
251+
module_hostvars, cluster_disabled_instances, play_hosts, control_console, leader_only
228252
)
229253
if err is not None:
230254
return helpers.ModuleRes(failed=True, msg=err)

module_utils/helpers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,10 +647,17 @@ def patch_clusterwide_config(control_console, new_sections):
647647
return True, None
648648

649649

650-
def enrich_replicasets_with_leaders(control_console, replicasets):
650+
def get_active_leaders(control_console):
651651
leaders, err = control_console.eval_res_err('''
652652
return require('cartridge.failover').get_active_leaders()
653653
''')
654+
if err is None and not leaders:
655+
leaders = {}
656+
return leaders, err
657+
658+
659+
def enrich_replicasets_with_leaders(control_console, replicasets):
660+
leaders, err = get_active_leaders(control_console)
654661
if err:
655662
return err
656663

@@ -754,6 +761,7 @@ class Helpers:
754761
read_yaml_file = staticmethod(read_yaml_file)
755762
get_clusterwide_config = staticmethod(get_clusterwide_config)
756763
patch_clusterwide_config = staticmethod(patch_clusterwide_config)
764+
get_active_leaders = staticmethod(get_active_leaders)
757765
enrich_replicasets_with_leaders = staticmethod(enrich_replicasets_with_leaders)
758766
get_disabled_instances = staticmethod(get_disabled_instances)
759767
get_topology_checksum = staticmethod(get_topology_checksum)

tasks/set_instance_facts.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@
150150

151151
# Edit topology check
152152

153+
cartridge_force_leader_control_instance: '{{ cartridge_force_leader_control_instance }}'
153154
cartridge_force_advertise_uris_change: '{{ cartridge_force_advertise_uris_change }}'
154155
cartridge_ignore_extra_cluster_instances: '{{ cartridge_ignore_extra_cluster_instances }}'
155156
cartridge_ignore_extra_cluster_replicasets: '{{ cartridge_ignore_extra_cluster_replicasets }}'

tasks/steps/blocks/set_control_instance.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
play_hosts: '{{ play_hosts }}'
1111
console_sock: '{{ alive_not_expelled_instance.console_sock }}'
1212
app_name: '{{ cartridge_app_name }}'
13+
leader_only: '{{ cartridge_force_leader_control_instance }}'
1314
run_once: true
1415
delegate_to: '{{ alive_not_expelled_instance.name }}'
1516
register: control_instance_res

unit/test_get_control_instance.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def call_get_control_instance(
2828
module_hostvars=None,
2929
play_hosts=None,
3030
cluster_disabled_instances='default',
31+
leader_only=False,
3132
):
3233
if module_hostvars is None:
3334
module_hostvars = {}
@@ -41,6 +42,7 @@ def call_get_control_instance(
4142
'play_hosts': play_hosts,
4243
'console_sock': console_sock,
4344
'app_name': app_name,
45+
'leader_only': leader_only,
4446
})
4547

4648

@@ -458,6 +460,49 @@ def test_dead_instances(self):
458460
'http_port': 8083,
459461
})
460462

463+
def test_leaders_only(self):
464+
self.instance.add_replicaset(alias='r1', instances=['instance-1', 'instance-2'])
465+
self.instance.add_replicaset(alias='r2', instances=['instance-3', 'instance-4'])
466+
467+
self.instance.set_failover_params(mode='stateful', state_provider='stateboard')
468+
self.instance.set_variable('active_leaders', {
469+
'r1-uuid': 'instance-2-uuid',
470+
'r2-uuid': 'instance-4-uuid',
471+
})
472+
473+
hostvars = {}
474+
hostvars.update(get_instance_hostvars('instance-1', run_dir='run-dir-1', http_port=8081))
475+
hostvars.update(get_instance_hostvars('instance-2', run_dir='run-dir-2', http_port=8082))
476+
hostvars.update(get_instance_hostvars('instance-3', run_dir='run-dir-3', http_port=8083))
477+
hostvars.update(get_instance_hostvars('instance-4', run_dir='run-dir-4', http_port=8084))
478+
479+
# instance-2 is selected since it's URI is
480+
# first lexicographically
481+
self.instance.set_membership_members([
482+
utils.get_member('instance-1', with_uuid=True),
483+
utils.get_member('instance-2', with_uuid=True),
484+
utils.get_member('instance-3', with_uuid=True),
485+
utils.get_member('instance-4', with_uuid=True),
486+
])
487+
res = call_get_control_instance('myapp', self.console_sock, hostvars, leader_only=True)
488+
self.assertFalse(res.failed, msg=res.msg)
489+
self.assertEqual(res.fact, {
490+
'name': 'instance-2',
491+
'console_sock': 'run-dir-2/myapp.instance-2.control',
492+
'http_port': 8082,
493+
})
494+
495+
# both leaders without UUID and dead
496+
self.instance.set_membership_members([
497+
utils.get_member('instance-1', with_uuid=True),
498+
utils.get_member('instance-2', with_uuid=False, status='dead'),
499+
utils.get_member('instance-3', with_uuid=True),
500+
utils.get_member('instance-4', with_uuid=False, status='dead'),
501+
])
502+
res = call_get_control_instance('myapp', self.console_sock, hostvars, leader_only=True)
503+
self.assertTrue(res.failed)
504+
self.assertIn("Not found any leader instance between the candidates: instance-1-uri, instance-3-uri", res.msg)
505+
461506
def tearDown(self):
462507
self.instance.stop()
463508
del self.instance

0 commit comments

Comments
 (0)