Skip to content

Commit 8e46086

Browse files
authored
Recheck annotations when reading leader object on 409 (patroni#3174)
There are cases when we may send the same PATCH request more than one time to K8s API server and it could happen that the first request actually successfully updated the target and we cancelled while waiting for a response. The second PATCH request in this case will fail due to resource_version mismatch. So far our strategy for update_leader() method was - re-read the object and repeat the request with the new resource_version. However, we can avoid the update by comparing annotations on the read object with annotations that we wanted to set.
1 parent e91e6b5 commit 8e46086

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

patroni/dcs/kubernetes.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,16 +1215,21 @@ def _retry(*args: Any, **kwargs: Any) -> Any:
12151215

12161216
self._kinds.set(self.leader_path, kind)
12171217

1218-
if not retry.ensure_deadline(0.5):
1219-
return False
1220-
12211218
kind_annotations = kind and kind.metadata.annotations or EMPTY_DICT
12221219
kind_resource_version = kind and kind.metadata.resource_version
12231220

12241221
# There is different leader or resource_version in cache didn't change
12251222
if kind and (kind_annotations.get(self._LEADER) != self._name or kind_resource_version == resource_version):
12261223
return False
12271224

1225+
# We can get 409 because we do at least one retry, and the first update might have succeeded,
1226+
# therefore we will check if annotations on the read object match expectations.
1227+
if all(kind_annotations.get(k) == v for k, v in annotations.items()):
1228+
return True
1229+
1230+
if not retry.ensure_deadline(0.5):
1231+
return False
1232+
12281233
return bool(_run_and_handle_exceptions(self._patch_or_create, self.leader_path, annotations,
12291234
kind_resource_version, ips=ips, retry=_retry))
12301235

tests/test_kubernetes.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -422,14 +422,18 @@ def test__update_leader_with_retry(self, mock_patch, mock_read):
422422
self.assertFalse(self.k.update_leader(cluster, '123'))
423423
mock_patch.side_effect = RetryFailedError('')
424424
self.assertRaises(KubernetesError, self.k.update_leader, cluster, '123')
425-
mock_patch.side_effect = k8s_client.rest.ApiException(409, '')
425+
mock_patch.side_effect = [k8s_client.rest.ApiException(409, ''),
426+
k8s_client.rest.ApiException(409, ''), mock_namespaced_kind()]
427+
mock_read.return_value.metadata.resource_version = '2'
426428
with patch('time.time', Mock(side_effect=[0, 0, 100, 200, 0, 0, 0, 0, 0, 100, 200])):
427429
self.assertFalse(self.k.update_leader(cluster, '123'))
428430
self.assertFalse(self.k.update_leader(cluster, '123'))
431+
mock_patch.side_effect = k8s_client.rest.ApiException(409, '')
429432
self.assertFalse(self.k.update_leader(cluster, '123'))
430433
mock_patch.side_effect = [k8s_client.rest.ApiException(409, ''), mock_namespaced_kind()]
431-
mock_read.return_value.metadata.resource_version = '2'
432-
self.assertIsNotNone(self.k._update_leader_with_retry({}, '1', []))
434+
self.assertTrue(self.k._update_leader_with_retry({}, '1', []))
435+
mock_patch.side_effect = [k8s_client.rest.ApiException(409, ''), mock_namespaced_kind()]
436+
self.assertIsNotNone(self.k._update_leader_with_retry({'foo': 'bar'}, '1', []))
433437
mock_patch.side_effect = k8s_client.rest.ApiException(409, '')
434438
mock_read.side_effect = RetryFailedError('')
435439
self.assertRaises(KubernetesError, self.k.update_leader, cluster, '123')

0 commit comments

Comments
 (0)