Skip to content

Commit 1e9c267

Browse files
committed
Restore gc admin password after snapshot restore
1 parent 6261725 commit 1e9c267

File tree

10 files changed

+149
-23
lines changed

10 files changed

+149
-23
lines changed

CHANGES.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ Unreleased
99

1010
* Remove grand-central tables when restoring a full snapshot or grand-central tables.
1111

12+
* Restore GC admin user password after snapshot restore.
13+
1214
2.53.0 (2025-09-25)
1315
-------------------
1416

crate/operator/bootstrap.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,12 @@
2929
from kubernetes_asyncio.stream import WsApiClient
3030

3131
from crate.operator.config import config
32-
from crate.operator.constants import CONNECT_TIMEOUT, GC_USERNAME, SYSTEM_USERNAME
32+
from crate.operator.constants import (
33+
CONNECT_TIMEOUT,
34+
GC_USER_SECRET_NAME,
35+
GC_USERNAME,
36+
SYSTEM_USERNAME,
37+
)
3338
from crate.operator.cratedb import create_user, get_connection
3439
from crate.operator.utils import crate
3540
from crate.operator.utils.k8s_api_client import GlobalApiClient
@@ -205,7 +210,9 @@ async def bootstrap_gc_admin_user(core: CoreV1Api, namespace: str, name: str):
205210
async with get_connection(host, password, timeout=CONNECT_TIMEOUT) as conn:
206211
async with conn.cursor() as cursor:
207212
password = await resolve_secret_key_ref(
208-
core, namespace, {"key": "password", "name": f"user-gc-{name}"}
213+
core,
214+
namespace,
215+
{"key": "password", "name": GC_USER_SECRET_NAME.format(name=name)},
209216
)
210217
await create_user(cursor, namespace, name, GC_USERNAME, password)
211218

crate/operator/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
SYSTEM_USERNAME = "system"
3636
GC_USERNAME = "gc_admin"
37+
GC_USER_SECRET_NAME = "user-gc-{name}"
3738

3839
CONNECT_TIMEOUT = 10.0
3940

crate/operator/create.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
DCUTIL_BINARY,
9696
DCUTIL_CHECKSUM,
9797
DECOMMISSION_TIMEOUT,
98+
GC_USER_SECRET_NAME,
9899
LABEL_COMPONENT,
99100
LABEL_MANAGED_BY,
100101
LABEL_NAME,
@@ -1217,7 +1218,7 @@ def get_gc_user_secret(
12171218
return V1Secret(
12181219
data={"password": b64encode(gen_password(50))},
12191220
metadata=V1ObjectMeta(
1220-
name=f"user-gc-{name}",
1221+
name=GC_USER_SECRET_NAME.format(name=name),
12211222
labels=labels,
12221223
owner_references=owner_references,
12231224
),

crate/operator/grand_central.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
from crate.operator.bootstrap import bootstrap_gc_admin_user
6868
from crate.operator.config import config
6969
from crate.operator.constants import (
70+
GC_USER_SECRET_NAME,
7071
GC_USERNAME,
7172
GRAND_CENTRAL_BACKEND_API_PORT,
7273
GRAND_CENTRAL_INIT_CONTAINER,
@@ -133,7 +134,7 @@ def get_grand_central_deployment(
133134
name="GRAND_CENTRAL_CRATEDB_PASSWORD",
134135
value_from=V1EnvVarSource(
135136
secret_key_ref=V1SecretKeySelector(
136-
key="password", name=f"user-gc-{name}"
137+
key="password", name=GC_USER_SECRET_NAME.format(name=name)
137138
),
138139
),
139140
),

crate/operator/handlers/handle_restore_backup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
BeforeRestoreBackupSubHandler,
4343
ResetSnapshotSubHandler,
4444
RestoreBackupSubHandler,
45-
RestoreSystemUserPasswordSubHandler,
45+
RestoreInternalUsersPasswordSubHandler,
4646
SendSuccessNotificationSubHandler,
4747
ValidateRestoreCompleteSubHandler,
4848
ensure_no_restore_in_progress,
@@ -224,7 +224,7 @@ def register_restore_handlers(
224224
depends_on.append(f"{CLUSTER_RESTORE_FIELD_ID}/restore_backup_data")
225225

226226
kopf.register(
227-
fn=RestoreSystemUserPasswordSubHandler(
227+
fn=RestoreInternalUsersPasswordSubHandler(
228228
namespace,
229229
name,
230230
change_hash,

crate/operator/restore_backup.py

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from crate.operator.config import config
4343
from crate.operator.constants import (
4444
API_GROUP,
45+
GC_USERNAME,
4546
RESOURCE_CRATEDB,
4647
SYSTEM_USERNAME,
4748
BackupStorageProvider,
@@ -66,6 +67,7 @@
6667
from crate.operator.utils.kopf import StateBasedSubHandler, subhandler_partial
6768
from crate.operator.utils.kubeapi import (
6869
get_cratedb_resource,
70+
get_gc_user_password,
6971
get_host,
7072
get_system_user_password,
7173
resolve_secret_key_ref,
@@ -765,7 +767,7 @@ async def _start_restore_snapshot(
765767
raise kopf.PermanentError("Snapshot could not be restored")
766768

767769

768-
class RestoreSystemUserPasswordSubHandler(StateBasedSubHandler):
770+
class RestoreInternalUsersPasswordSubHandler(StateBasedSubHandler):
769771
@crate.on.error(error_handler=crate.send_update_failed_notification)
770772
async def handle( # type: ignore
771773
self,
@@ -775,9 +777,9 @@ async def handle( # type: ignore
775777
**kwargs: Any,
776778
):
777779
"""
778-
Restore the system user password from the secret in the namespace.
779-
Use crash here because during a restore the system user password was
780-
probably set to a different value.
780+
Restore the system user and grand-central user passwords from the secret
781+
in the namespace. Use crash here because during a restore the system user
782+
password was probably set to a different value.
781783
782784
:param namespace: The Kubernetes namespace of the CrateDB cluster.
783785
:param name: The CrateDB custom resource name defining the CrateDB cluster.
@@ -786,7 +788,6 @@ async def handle( # type: ignore
786788
async with GlobalApiClient() as api_client:
787789
core = CoreV1Api(api_client)
788790
password = await get_system_user_password(core, namespace, name)
789-
password_quoted = QuotedString(password).getquoted().decode()
790791

791792
cratedb = await get_cratedb_resource(namespace, name)
792793
pod_name = get_crash_pod_name(cratedb, name)
@@ -798,17 +799,53 @@ async def handle( # type: ignore
798799
# system user password.
799800

800801
# Reset the system user with the password from the CRD
801-
command = (
802-
f'ALTER USER "{SYSTEM_USERNAME}" SET (password={password_quoted});'
802+
await self._reset_user_password(
803+
SYSTEM_USERNAME, password, namespace, pod_name, scheme, logger
803804
)
804-
result = await run_crash_command(
805-
namespace, pod_name, scheme, command, logger
805+
806+
await self._restore_gc_admin_password(
807+
core, namespace, name, pod_name, scheme, logger
808+
)
809+
810+
async def _restore_gc_admin_password(
811+
self,
812+
core: CoreV1Api,
813+
namespace: str,
814+
name: str,
815+
pod_name: str,
816+
scheme: str,
817+
logger: logging.Logger,
818+
):
819+
try:
820+
gc_admin_password = await get_gc_user_password(core, namespace, name)
821+
await self._reset_user_password(
822+
GC_USERNAME, gc_admin_password, namespace, pod_name, scheme, logger
806823
)
807-
if "ALTER OK" in result:
808-
logger.info("... success")
809-
else:
810-
logger.info("... error. %s", result)
811-
raise kopf.TemporaryError(delay=config.BOOTSTRAP_RETRY_DELAY)
824+
except kopf.TemporaryError as e:
825+
logger.warning("GC admin password reset failed; will retry: %s", e)
826+
raise
827+
except Exception as e:
828+
logger.info(
829+
"GC admin secret not found or retrieval failed; skipping: %s", e
830+
)
831+
832+
@staticmethod
833+
async def _reset_user_password(
834+
username: str,
835+
password: str,
836+
namespace: str,
837+
pod_name: str,
838+
scheme: str,
839+
logger: logging.Logger,
840+
):
841+
password_quoted = QuotedString(password).getquoted().decode()
842+
command = f'ALTER USER "{username}" SET (password={password_quoted});'
843+
result = await run_crash_command(namespace, pod_name, scheme, command, logger)
844+
if "ALTER OK" in result:
845+
logger.info("... %s password reset success", username)
846+
else:
847+
logger.info("... %s password reset error. %s", username, result)
848+
raise kopf.TemporaryError(delay=config.BOOTSTRAP_RETRY_DELAY)
812849

813850

814851
async def update_cratedb_admin_username_in_cratedb(

crate/operator/utils/kubeapi.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,12 @@
3232
)
3333

3434
from crate.operator.config import config
35-
from crate.operator.constants import API_GROUP, LABEL_USER_PASSWORD, RESOURCE_CRATEDB
35+
from crate.operator.constants import (
36+
API_GROUP,
37+
GC_USER_SECRET_NAME,
38+
LABEL_USER_PASSWORD,
39+
RESOURCE_CRATEDB,
40+
)
3641
from crate.operator.utils.formatting import b64decode
3742
from crate.operator.utils.k8s_api_client import GlobalApiClient
3843
from crate.operator.utils.typing import K8sModel, SecretKeyRef
@@ -236,3 +241,18 @@ async def get_cratedb_resource(namespace: str, name: str) -> dict:
236241
namespace=namespace,
237242
name=name,
238243
)
244+
245+
246+
async def get_gc_user_password(core: CoreV1Api, namespace: str, name: str) -> str:
247+
"""
248+
Return the password for the grand-central user of cluster ``name`` in ``namespace``.
249+
250+
:param core: An instance of the Kubernetes Core V1 API.
251+
:param namespace: The namespace where the CrateDB cluster is deployed.
252+
:param name: The name of the CrateDB cluster.
253+
"""
254+
return await resolve_secret_key_ref(
255+
core,
256+
namespace,
257+
{"key": "password", "name": GC_USER_SECRET_NAME.format(name=name)},
258+
)

tests/test_create_grand_central.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
from crate.operator.constants import (
3232
API_GROUP,
33+
GC_USER_SECRET_NAME,
3334
GC_USERNAME,
3435
GRAND_CENTRAL_PROMETHEUS_PORT,
3536
GRAND_CENTRAL_RESOURCE_PREFIX,
@@ -200,10 +201,14 @@ async def test_create_grand_central(faker, namespace, kopf_runner, api_client):
200201
does_secret_exist,
201202
core,
202203
namespace.metadata.name,
203-
f"user-gc-{name}",
204+
GC_USER_SECRET_NAME.format(name=name),
204205
)
205206
secrets = (await core.list_namespaced_secret(namespace.metadata.name)).items
206-
secret_pw = next(filter(lambda x: x.metadata.name == f"user-gc-{name}", secrets))
207+
secret_pw = next(
208+
filter(
209+
lambda x: x.metadata.name == GC_USER_SECRET_NAME.format(name=name), secrets
210+
)
211+
)
207212

208213
gc_admin_pw = b64decode(secret_pw.data["password"])
209214

tests/test_restore_backup.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
RESTORE_MAX_BYTES_PER_SEC,
4747
RestoreBackupSubHandler,
4848
RestoreInternalTables,
49+
RestoreInternalUsersPasswordSubHandler,
4950
RestoreType,
5051
)
5152
from crate.operator.restore_backup_repository_data import (
@@ -950,6 +951,57 @@ async def test_create_backup_repository(
950951
)
951952

952953

954+
@pytest.mark.asyncio
955+
@mock.patch("crate.operator.restore_backup.get_gc_user_password")
956+
@mock.patch("crate.operator.restore_backup.run_crash_command")
957+
async def test_gc_admin_password_restore(
958+
mock_run_crash, mock_get_gc_user_password, faker
959+
):
960+
name = faker.domain_word()
961+
namespace = faker.uuid4()
962+
hash = faker.md5()
963+
pod_name = f"data-hot-{name}-0"
964+
scheme = "https"
965+
logger = mock.Mock()
966+
core = mock.AsyncMock()
967+
968+
handler = RestoreInternalUsersPasswordSubHandler(namespace, name, hash, {})
969+
970+
# secret retrieval fails - should log and continue
971+
mock_get_gc_user_password.side_effect = KeyError("missing secret")
972+
973+
await handler._restore_gc_admin_password(
974+
core, namespace, name, pod_name, scheme, logger
975+
)
976+
977+
logger.info.assert_called_with(
978+
"GC admin secret not found or retrieval failed; skipping: %s", mock.ANY
979+
)
980+
981+
# reset fails with kopf.TemporaryError - should re-raise
982+
mock_get_gc_user_password.side_effect = None
983+
mock_get_gc_user_password.return_value = "gc-secret-password"
984+
985+
mock_run_crash.return_value = "ERROR something wrong"
986+
987+
with pytest.raises(kopf.TemporaryError):
988+
await handler._restore_gc_admin_password(
989+
core, namespace, name, pod_name, scheme, logger
990+
)
991+
992+
mock_run_crash.assert_called()
993+
994+
# successful password reset
995+
mock_get_gc_user_password.return_value = "gc-secret-password"
996+
mock_run_crash.return_value = "ALTER OK"
997+
998+
await handler._restore_gc_admin_password(
999+
core, namespace, name, pod_name, scheme, logger
1000+
)
1001+
1002+
logger.info.assert_any_call("... %s password reset success", "gc_admin")
1003+
1004+
9531005
def get_azure_blob_secrets(name: str) -> dict[str, Any]:
9541006
return {
9551007
"accountKey": {

0 commit comments

Comments
 (0)