Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Unreleased

* Remove grand-central tables when restoring a full snapshot or grand-central tables.

* Restore GC admin user password after snapshot restore.

2.53.0 (2025-09-25)
-------------------

Expand Down
11 changes: 9 additions & 2 deletions crate/operator/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@
from kubernetes_asyncio.stream import WsApiClient

from crate.operator.config import config
from crate.operator.constants import CONNECT_TIMEOUT, GC_USERNAME, SYSTEM_USERNAME
from crate.operator.constants import (
CONNECT_TIMEOUT,
GC_USER_SECRET_NAME,
GC_USERNAME,
SYSTEM_USERNAME,
)
from crate.operator.cratedb import create_user, get_connection
from crate.operator.utils import crate
from crate.operator.utils.k8s_api_client import GlobalApiClient
Expand Down Expand Up @@ -205,7 +210,9 @@ async def bootstrap_gc_admin_user(core: CoreV1Api, namespace: str, name: str):
async with get_connection(host, password, timeout=CONNECT_TIMEOUT) as conn:
async with conn.cursor() as cursor:
password = await resolve_secret_key_ref(
core, namespace, {"key": "password", "name": f"user-gc-{name}"}
core,
namespace,
{"key": "password", "name": GC_USER_SECRET_NAME.format(name=name)},
)
await create_user(cursor, namespace, name, GC_USERNAME, password)

Expand Down
1 change: 1 addition & 0 deletions crate/operator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

SYSTEM_USERNAME = "system"
GC_USERNAME = "gc_admin"
GC_USER_SECRET_NAME = "user-gc-{name}"

CONNECT_TIMEOUT = 10.0

Expand Down
3 changes: 2 additions & 1 deletion crate/operator/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
DCUTIL_BINARY,
DCUTIL_CHECKSUM,
DECOMMISSION_TIMEOUT,
GC_USER_SECRET_NAME,
LABEL_COMPONENT,
LABEL_MANAGED_BY,
LABEL_NAME,
Expand Down Expand Up @@ -1217,7 +1218,7 @@ def get_gc_user_secret(
return V1Secret(
data={"password": b64encode(gen_password(50))},
metadata=V1ObjectMeta(
name=f"user-gc-{name}",
name=GC_USER_SECRET_NAME.format(name=name),
labels=labels,
owner_references=owner_references,
),
Expand Down
3 changes: 2 additions & 1 deletion crate/operator/grand_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
from crate.operator.bootstrap import bootstrap_gc_admin_user
from crate.operator.config import config
from crate.operator.constants import (
GC_USER_SECRET_NAME,
GC_USERNAME,
GRAND_CENTRAL_BACKEND_API_PORT,
GRAND_CENTRAL_INIT_CONTAINER,
Expand Down Expand Up @@ -133,7 +134,7 @@ def get_grand_central_deployment(
name="GRAND_CENTRAL_CRATEDB_PASSWORD",
value_from=V1EnvVarSource(
secret_key_ref=V1SecretKeySelector(
key="password", name=f"user-gc-{name}"
key="password", name=GC_USER_SECRET_NAME.format(name=name)
),
),
),
Expand Down
4 changes: 2 additions & 2 deletions crate/operator/handlers/handle_restore_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
BeforeRestoreBackupSubHandler,
ResetSnapshotSubHandler,
RestoreBackupSubHandler,
RestoreSystemUserPasswordSubHandler,
RestoreInternalUsersPasswordSubHandler,
SendSuccessNotificationSubHandler,
ValidateRestoreCompleteSubHandler,
ensure_no_restore_in_progress,
Expand Down Expand Up @@ -224,7 +224,7 @@ def register_restore_handlers(
depends_on.append(f"{CLUSTER_RESTORE_FIELD_ID}/restore_backup_data")

kopf.register(
fn=RestoreSystemUserPasswordSubHandler(
fn=RestoreInternalUsersPasswordSubHandler(
namespace,
name,
change_hash,
Expand Down
65 changes: 51 additions & 14 deletions crate/operator/restore_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from crate.operator.config import config
from crate.operator.constants import (
API_GROUP,
GC_USERNAME,
RESOURCE_CRATEDB,
SYSTEM_USERNAME,
BackupStorageProvider,
Expand All @@ -66,6 +67,7 @@
from crate.operator.utils.kopf import StateBasedSubHandler, subhandler_partial
from crate.operator.utils.kubeapi import (
get_cratedb_resource,
get_gc_user_password,
get_host,
get_system_user_password,
resolve_secret_key_ref,
Expand Down Expand Up @@ -765,7 +767,7 @@ async def _start_restore_snapshot(
raise kopf.PermanentError("Snapshot could not be restored")


class RestoreSystemUserPasswordSubHandler(StateBasedSubHandler):
class RestoreInternalUsersPasswordSubHandler(StateBasedSubHandler):
@crate.on.error(error_handler=crate.send_update_failed_notification)
async def handle( # type: ignore
self,
Expand All @@ -775,9 +777,9 @@ async def handle( # type: ignore
**kwargs: Any,
):
"""
Restore the system user password from the secret in the namespace.
Use crash here because during a restore the system user password was
probably set to a different value.
Restore the system user and grand-central user passwords from the secret
in the namespace. Use crash here because during a restore the system user
password was probably set to a different value.

:param namespace: The Kubernetes namespace of the CrateDB cluster.
:param name: The CrateDB custom resource name defining the CrateDB cluster.
Expand All @@ -786,7 +788,6 @@ async def handle( # type: ignore
async with GlobalApiClient() as api_client:
core = CoreV1Api(api_client)
password = await get_system_user_password(core, namespace, name)
password_quoted = QuotedString(password).getquoted().decode()

cratedb = await get_cratedb_resource(namespace, name)
pod_name = get_crash_pod_name(cratedb, name)
Expand All @@ -798,17 +799,53 @@ async def handle( # type: ignore
# system user password.

# Reset the system user with the password from the CRD
command = (
f'ALTER USER "{SYSTEM_USERNAME}" SET (password={password_quoted});'
await self._reset_user_password(
SYSTEM_USERNAME, password, namespace, pod_name, scheme, logger
)
result = await run_crash_command(
namespace, pod_name, scheme, command, logger

await self._restore_gc_admin_password(
core, namespace, name, pod_name, scheme, logger
)

async def _restore_gc_admin_password(
self,
core: CoreV1Api,
namespace: str,
name: str,
pod_name: str,
scheme: str,
logger: logging.Logger,
):
try:
gc_admin_password = await get_gc_user_password(core, namespace, name)
await self._reset_user_password(
GC_USERNAME, gc_admin_password, namespace, pod_name, scheme, logger
)
if "ALTER OK" in result:
logger.info("... success")
else:
logger.info("... error. %s", result)
raise kopf.TemporaryError(delay=config.BOOTSTRAP_RETRY_DELAY)
except kopf.TemporaryError as e:
logger.warning("GC admin password reset failed; will retry: %s", e)
raise
except Exception as e:
logger.info(
"GC admin secret not found or retrieval failed; skipping: %s", e
)

@staticmethod
async def _reset_user_password(
username: str,
password: str,
namespace: str,
pod_name: str,
scheme: str,
logger: logging.Logger,
):
password_quoted = QuotedString(password).getquoted().decode()
command = f'ALTER USER "{username}" SET (password={password_quoted});'
result = await run_crash_command(namespace, pod_name, scheme, command, logger)
if "ALTER OK" in result:
logger.info("... %s password reset success", username)
else:
logger.info("... %s password reset error. %s", username, result)
raise kopf.TemporaryError(delay=config.BOOTSTRAP_RETRY_DELAY)


async def update_cratedb_admin_username_in_cratedb(
Expand Down
22 changes: 21 additions & 1 deletion crate/operator/utils/kubeapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@
)

from crate.operator.config import config
from crate.operator.constants import API_GROUP, LABEL_USER_PASSWORD, RESOURCE_CRATEDB
from crate.operator.constants import (
API_GROUP,
GC_USER_SECRET_NAME,
LABEL_USER_PASSWORD,
RESOURCE_CRATEDB,
)
from crate.operator.utils.formatting import b64decode
from crate.operator.utils.k8s_api_client import GlobalApiClient
from crate.operator.utils.typing import K8sModel, SecretKeyRef
Expand Down Expand Up @@ -236,3 +241,18 @@ async def get_cratedb_resource(namespace: str, name: str) -> dict:
namespace=namespace,
name=name,
)


async def get_gc_user_password(core: CoreV1Api, namespace: str, name: str) -> str:
"""
Return the password for the grand-central user of cluster ``name`` in ``namespace``.

:param core: An instance of the Kubernetes Core V1 API.
:param namespace: The namespace where the CrateDB cluster is deployed.
:param name: The name of the CrateDB cluster.
"""
return await resolve_secret_key_ref(
core,
namespace,
{"key": "password", "name": GC_USER_SECRET_NAME.format(name=name)},
)
9 changes: 7 additions & 2 deletions tests/test_create_grand_central.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

from crate.operator.constants import (
API_GROUP,
GC_USER_SECRET_NAME,
GC_USERNAME,
GRAND_CENTRAL_PROMETHEUS_PORT,
GRAND_CENTRAL_RESOURCE_PREFIX,
Expand Down Expand Up @@ -200,10 +201,14 @@ async def test_create_grand_central(faker, namespace, kopf_runner, api_client):
does_secret_exist,
core,
namespace.metadata.name,
f"user-gc-{name}",
GC_USER_SECRET_NAME.format(name=name),
)
secrets = (await core.list_namespaced_secret(namespace.metadata.name)).items
secret_pw = next(filter(lambda x: x.metadata.name == f"user-gc-{name}", secrets))
secret_pw = next(
filter(
lambda x: x.metadata.name == GC_USER_SECRET_NAME.format(name=name), secrets
)
)

gc_admin_pw = b64decode(secret_pw.data["password"])

Expand Down
52 changes: 52 additions & 0 deletions tests/test_restore_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
RESTORE_MAX_BYTES_PER_SEC,
RestoreBackupSubHandler,
RestoreInternalTables,
RestoreInternalUsersPasswordSubHandler,
RestoreType,
)
from crate.operator.restore_backup_repository_data import (
Expand Down Expand Up @@ -950,6 +951,57 @@ async def test_create_backup_repository(
)


@pytest.mark.asyncio
@mock.patch("crate.operator.restore_backup.get_gc_user_password")
@mock.patch("crate.operator.restore_backup.run_crash_command")
async def test_gc_admin_password_restore(
mock_run_crash, mock_get_gc_user_password, faker
):
name = faker.domain_word()
namespace = faker.uuid4()
hash = faker.md5()
pod_name = f"data-hot-{name}-0"
scheme = "https"
logger = mock.Mock()
core = mock.AsyncMock()

handler = RestoreInternalUsersPasswordSubHandler(namespace, name, hash, {})

# secret retrieval fails - should log and continue
mock_get_gc_user_password.side_effect = KeyError("missing secret")

await handler._restore_gc_admin_password(
core, namespace, name, pod_name, scheme, logger
)

logger.info.assert_called_with(
"GC admin secret not found or retrieval failed; skipping: %s", mock.ANY
)

# reset fails with kopf.TemporaryError - should re-raise
mock_get_gc_user_password.side_effect = None
mock_get_gc_user_password.return_value = "gc-secret-password"

mock_run_crash.return_value = "ERROR something wrong"

with pytest.raises(kopf.TemporaryError):
await handler._restore_gc_admin_password(
core, namespace, name, pod_name, scheme, logger
)

mock_run_crash.assert_called()

# successful password reset
mock_get_gc_user_password.return_value = "gc-secret-password"
mock_run_crash.return_value = "ALTER OK"

await handler._restore_gc_admin_password(
core, namespace, name, pod_name, scheme, logger
)

logger.info.assert_any_call("... %s password reset success", "gc_admin")


def get_azure_blob_secrets(name: str) -> dict[str, Any]:
return {
"accountKey": {
Expand Down