Skip to content

Commit 9273964

Browse files
authored
[DPE-8462] Port tests (#1149)
* Upgrade tests WIP * Missed helper and async tweaks * Fix name and channel * Copy conf test * Leader resumes refresh * K8s scale * Wait for units to decrease * More forceful force refresh * Checking for blocked unit, not app * Wait for unit status blocks * Factor out refresh * Retry resume action * Wire switchover in async cluster upgrade * Check for standby leader in replication health
1 parent ee672b2 commit 9273964

File tree

24 files changed

+1482
-984
lines changed

24 files changed

+1482
-984
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ __pycache__/
88
*.py[cod]
99
./requirements.txt
1010
requirements-last-build.txt
11+
.last_refresh_unit_status.json
1112

1213
# PyCharm project folder.
1314
.idea/

poetry.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ psycopg2-binary = "^2.9.10"
7474
boto3 = "*"
7575
tenacity = "^9.1.2"
7676
allure-pytest = "^2.15.0"
77-
jubilant = "^1.4.0"
77+
jubilant = "^1.5.0"
7878
tomli-w = "^1.2.0"
7979
tomli = "^2.3.0"
8080

@@ -103,6 +103,7 @@ minversion = "6.0"
103103
log_cli_level = "INFO"
104104
asyncio_mode = "auto"
105105
markers = ["juju3", "juju_secrets"]
106+
addopts = "--exitfirst"
106107

107108
# Linting tools configuration
108109
[tool.ruff]

src/charm.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2264,6 +2264,26 @@ def _can_connect_to_postgresql(self) -> bool:
22642264
return False
22652265
return True
22662266

2267+
def _api_update_config(self, available_cpu_cores: int) -> None:
2268+
# Use config value if set, calculate otherwise
2269+
if self.config.experimental_max_connections:
2270+
max_connections = self.config.experimental_max_connections
2271+
else:
2272+
max_connections = max(4 * available_cpu_cores, 100)
2273+
2274+
cfg_patch = {
2275+
"max_connections": max_connections,
2276+
"max_prepared_transactions": self.config.memory_max_prepared_transactions,
2277+
"max_replication_slots": 25,
2278+
"max_wal_senders": 25,
2279+
"shared_buffers": self.config.memory_shared_buffers,
2280+
"wal_keep_size": self.config.durability_wal_keep_size,
2281+
}
2282+
base_patch = {}
2283+
if primary_endpoint := self.async_replication.get_primary_cluster_endpoint():
2284+
base_patch["standby_cluster"] = {"host": primary_endpoint}
2285+
self._patroni.bulk_update_parameters_controller_by_patroni(cfg_patch, base_patch)
2286+
22672287
def update_config(self, is_creating_backup: bool = False) -> bool:
22682288
"""Updates Patroni config file based on the existence of the TLS files."""
22692289
# Retrieve PostgreSQL parameters.
@@ -2328,20 +2348,7 @@ def update_config(self, is_creating_backup: bool = False) -> bool:
23282348
logger.debug("Early exit update_config: Patroni not started yet")
23292349
return False
23302350

2331-
# Use config value if set, calculate otherwise
2332-
if self.config.experimental_max_connections:
2333-
max_connections = self.config.experimental_max_connections
2334-
else:
2335-
max_connections = max(4 * available_cpu_cores, 100)
2336-
2337-
self._patroni.bulk_update_parameters_controller_by_patroni({
2338-
"max_connections": max_connections,
2339-
"max_prepared_transactions": self.config.memory_max_prepared_transactions,
2340-
"max_replication_slots": 25,
2341-
"max_wal_senders": 25,
2342-
"shared_buffers": self.config.memory_shared_buffers,
2343-
"wal_keep_size": self.config.durability_wal_keep_size,
2344-
})
2351+
self._api_update_config(available_cpu_cores)
23452352

23462353
self._patroni.ensure_slots_controller_by_patroni(replication_slots)
23472354

src/patroni.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,12 @@ def is_replication_healthy(self) -> bool:
389389
try:
390390
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
391391
with attempt:
392-
if not (primary := self.get_primary()):
392+
primary = (
393+
self.get_standby_leader()
394+
if self._charm.async_replication.get_primary_cluster_endpoint()
395+
else self.get_primary()
396+
)
397+
if not primary:
393398
logger.debug("Failed replication check no primary reported")
394399
raise Exception
395400

@@ -494,15 +499,26 @@ def member_streaming(self) -> bool:
494499
return response.get("replication_state") == "streaming"
495500

496501
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
497-
def bulk_update_parameters_controller_by_patroni(self, parameters: dict[str, Any]) -> None:
502+
def bulk_update_parameters_controller_by_patroni(
503+
self, parameters: dict[str, Any], base_parameters: dict[str, Any] | None
504+
) -> None:
498505
"""Update the value of a parameter controller by Patroni.
499506
500507
For more information, check https://patroni.readthedocs.io/en/latest/patroni_configuration.html#postgresql-parameters-controlled-by-patroni.
501508
"""
509+
if not base_parameters:
510+
base_parameters = {}
502511
requests.patch(
503512
f"{self._patroni_url}/config",
504513
verify=self._verify,
505-
json={"postgresql": {"parameters": parameters}},
514+
json={
515+
"postgresql": {
516+
"remove_data_directory_on_rewind_failure": False,
517+
"remove_data_directory_on_diverged_timelines": False,
518+
"parameters": parameters,
519+
},
520+
**base_parameters,
521+
},
506522
auth=self._patroni_auth,
507523
timeout=API_REQUEST_TIMEOUT,
508524
)
@@ -727,15 +743,20 @@ def restart_postgresql(self) -> None:
727743
timeout=API_REQUEST_TIMEOUT,
728744
)
729745

730-
def switchover(self, candidate: str | None = None, wait: bool = True) -> None:
746+
def switchover(
747+
self, candidate: str | None = None, wait: bool = True, async_cluster: bool = False
748+
) -> None:
731749
"""Trigger a switchover."""
732750
# Try to trigger the switchover.
733751
if candidate is not None:
734752
candidate = candidate.replace("/", "-")
735753

736754
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
737755
with attempt:
738-
primary = self.get_primary()
756+
primary = self.get_primary() if not async_cluster else self.get_standby_leader()
757+
if primary == candidate:
758+
logger.info("Candidate and leader are the same")
759+
return
739760
r = requests.post(
740761
f"{self._patroni_url}/switchover",
741762
json={"leader": primary, "candidate": candidate},

src/refresh.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@ def run_pre_refresh_checks_after_1_unit_refreshed(self) -> None:
6565
)
6666
else:
6767
try:
68-
self._charm._patroni.switchover(candidate=last_unit_to_refresh)
68+
self._charm._patroni.switchover(
69+
candidate=last_unit_to_refresh,
70+
async_cluster=bool(
71+
self._charm.async_replication.get_primary_cluster_endpoint()
72+
),
73+
)
6974
except SwitchoverFailedError as e:
7075
logger.warning(f"switchover failed with reason: {e}")
7176
raise charm_refresh.PrecheckFailed("Unable to switch primary") from None

tests/integration/conftest.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,28 +104,15 @@ def juju(request: pytest.FixtureRequest):
104104
105105
This adds command line parameter ``--keep-models`` (see help for details).
106106
"""
107-
controller = request.config.getoption("--controller")
108107
model = request.config.getoption("--model")
109-
controller_and_model = None
110-
if controller and model:
111-
controller_and_model = f"{controller}:{model}"
112-
elif controller:
113-
controller_and_model = controller
114-
elif model:
115-
controller_and_model = model
116108
keep_models = bool(request.config.getoption("--keep-models"))
117109

118-
if controller_and_model:
119-
juju = jubilant.Juju(model=controller_and_model) # type: ignore
110+
if model:
111+
juju = jubilant.Juju(model=model)
120112
yield juju
121-
log = juju.debug_log(limit=1000)
122113
else:
123114
with jubilant.temp_model(keep=keep_models) as juju:
124115
yield juju
125-
log = juju.debug_log(limit=1000)
126-
127-
if request.session.testsfailed:
128-
print(log, end="")
129116

130117

131118
@pytest.fixture(scope="module")

0 commit comments

Comments
 (0)