Skip to content

Commit ff02253

Browse files
[DPE-6965] Storage pools (#852)
* Separate storage pools Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix charm test Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix async replication Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix replica bootstrap Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix data directory removal on restore Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix backup do microceph test Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix upgrade integration tests Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Add test to check new multiple storages Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Add storages'descriptions Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix test_charm_garbage_ignorance Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix restore cluster test Signed-off-by: Marcelo Henrique Neppel <[email protected]> * Fix storage re-use test Signed-off-by: Marcelo Henrique Neppel <[email protected]> --------- Signed-off-by: Marcelo Henrique Neppel <[email protected]>
1 parent f60f4ef commit ff02253

File tree

19 files changed

+239
-86
lines changed

19 files changed

+239
-86
lines changed

lib/charms/postgresql_k8s/v0/postgresql.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
# Increment this PATCH version before using `charmcraft publish-lib` or reset
3737
# to 0 if you are raising the major API version
38-
LIBPATCH = 51
38+
LIBPATCH = 52
3939

4040
# Groups to distinguish HBA access
4141
ACCESS_GROUP_IDENTITY = "identity_access"
@@ -698,15 +698,22 @@ def list_valid_privileges_and_roles(self) -> Tuple[Set[str], Set[str]]:
698698
"superuser",
699699
}, {role[0] for role in cursor.fetchall() if role[0]}
700700

701-
def set_up_database(self) -> None:
701+
def set_up_database(self, temp_location: Optional[str] = None) -> None:
702702
"""Set up postgres database with the right permissions."""
703703
connection = None
704+
cursor = None
704705
try:
705-
with self._connect_to_database() as connection, connection.cursor() as cursor:
706-
cursor.execute("SELECT TRUE FROM pg_roles WHERE rolname='admin';")
707-
if cursor.fetchone() is not None:
708-
return
706+
connection = self._connect_to_database()
707+
cursor = connection.cursor()
709708

709+
if temp_location is not None:
710+
cursor.execute("SELECT TRUE FROM pg_tablespace WHERE spcname='temp';")
711+
if cursor.fetchone() is None:
712+
cursor.execute(f"CREATE TABLESPACE temp LOCATION '{temp_location}';")
713+
cursor.execute("GRANT CREATE ON TABLESPACE temp TO public;")
714+
715+
cursor.execute("SELECT TRUE FROM pg_roles WHERE rolname='admin';")
716+
if cursor.fetchone() is None:
710717
# Allow access to the postgres database only to the system users.
711718
cursor.execute("REVOKE ALL PRIVILEGES ON DATABASE postgres FROM PUBLIC;")
712719
cursor.execute("REVOKE CREATE ON SCHEMA public FROM PUBLIC;")
@@ -725,6 +732,8 @@ def set_up_database(self) -> None:
725732
logger.error(f"Failed to set up databases: {e}")
726733
raise PostgreSQLDatabasesSetupError() from e
727734
finally:
735+
if cursor is not None:
736+
cursor.close()
728737
if connection is not None:
729738
connection.close()
730739

metadata.yaml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,22 @@ requires:
6262
optional: true
6363

6464
storage:
65-
pgdata:
65+
archive:
6666
type: filesystem
67-
location: /var/snap/charmed-postgresql/common
67+
description: Storage mount used for holding local backups (before typically sending them to remote object storage) when relevant/needed.
68+
location: /var/snap/charmed-postgresql/common/data/archive
69+
data:
70+
type: filesystem
71+
description: Storage mount used for storing all tables, indexes, and so on (except those from temporary tablespaces).
72+
location: /var/snap/charmed-postgresql/common/var/lib/postgresql
73+
logs:
74+
type: filesystem
75+
description: Storage mount used for storing all the logs that are part of the transactional commit path (WAL files).
76+
location: /var/snap/charmed-postgresql/common/data/logs
77+
temp:
78+
type: filesystem
79+
description: Storage mount used for storing temporary tablespaces (where typically sort operations happen).
80+
location: /var/snap/charmed-postgresql/common/data/temp
6881

6982
assumes:
7083
- juju

src/backups.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def _tls_ca_chain_filename(self) -> str:
9797
"""Returns the path to the TLS CA chain file."""
9898
s3_parameters, _ = self._retrieve_s3_parameters()
9999
if s3_parameters.get("tls-ca-chain") is not None:
100-
return f"{self.charm._storage_path}/pgbackrest-tls-ca-chain.crt"
100+
return f"{PGBACKREST_CONF_PATH}/pgbackrest-tls-ca-chain.crt"
101101
return ""
102102

103103
def _get_s3_session_resource(self, s3_parameters: dict):
@@ -327,12 +327,25 @@ def _create_bucket_if_not_exists(self) -> None:
327327

328328
def _empty_data_files(self) -> bool:
329329
"""Empty the PostgreSQL data directory in preparation of backup restore."""
330+
paths = [
331+
"/var/snap/charmed-postgresql/common/data/archive",
332+
POSTGRESQL_DATA_PATH,
333+
"/var/snap/charmed-postgresql/common/data/logs",
334+
"/var/snap/charmed-postgresql/common/data/temp",
335+
]
336+
path = None
330337
try:
331-
path = Path(POSTGRESQL_DATA_PATH)
332-
if path.exists() and path.is_dir():
333-
shutil.rmtree(path)
338+
for path in paths:
339+
path_object = Path(path)
340+
if path_object.exists() and path_object.is_dir():
341+
for item in os.listdir(path):
342+
item_path = os.path.join(path, item)
343+
if os.path.isfile(item_path) or os.path.islink(item_path):
344+
os.remove(item_path)
345+
elif os.path.isdir(item_path):
346+
shutil.rmtree(item_path)
334347
except OSError as e:
335-
logger.warning(f"Failed to remove contents of the data directory with error: {e!s}")
348+
logger.warning(f"Failed to remove contents from {path} with error: {e!s}")
336349
return False
337350

338351
return True

src/charm.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
PATRONI_PASSWORD_KEY,
8787
PEER,
8888
PLUGIN_OVERRIDES,
89+
POSTGRESQL_DATA_PATH,
8990
POSTGRESQL_SNAP_NAME,
9091
RAFT_PASSWORD_KEY,
9192
REPLICATION_PASSWORD_KEY,
@@ -197,9 +198,8 @@ def __init__(self, *args):
197198
self.framework.observe(self.on.update_status, self._on_update_status)
198199
self.cluster_name = self.app.name
199200
self._member_name = self.unit.name.replace("/", "-")
200-
201201
self._certs_path = "/usr/local/share/ca-certificates"
202-
self._storage_path = self.meta.storages["pgdata"].location
202+
self._storage_path = self.meta.storages["data"].location
203203

204204
self.upgrade = PostgreSQLUpgrade(
205205
self,
@@ -1469,7 +1469,9 @@ def _start_primary(self, event: StartEvent) -> None:
14691469
event.defer()
14701470
return
14711471

1472-
self.postgresql.set_up_database()
1472+
self.postgresql.set_up_database(
1473+
temp_location="/var/snap/charmed-postgresql/common/data/temp"
1474+
)
14731475

14741476
access_groups = self.postgresql.list_access_groups()
14751477
if access_groups != set(ACCESS_GROUPS):
@@ -1722,6 +1724,11 @@ def _handle_processes_failures(self) -> bool:
17221724
# Restart the PostgreSQL process if it was frozen (in that case, the Patroni
17231725
# process is running by the PostgreSQL process not).
17241726
if self._unit_ip in self.members_ips and self._patroni.member_inactive:
1727+
data_directory_contents = os.listdir(POSTGRESQL_DATA_PATH)
1728+
if len(data_directory_contents) == 1 and data_directory_contents[0] == "pg_wal":
1729+
os.remove(os.path.join(POSTGRESQL_DATA_PATH, "pg_wal"))
1730+
logger.info("PostgreSQL data directory was not empty. Removed pg_wal")
1731+
return True
17251732
try:
17261733
self._patroni.restart_patroni()
17271734
logger.info("restarted PostgreSQL because it was not running")

src/relations/async_replication.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def _configure_standby_cluster(self, event: RelationChangedEvent) -> bool:
186186
raise Exception(error)
187187
if system_identifier != relation.data[relation.app].get("system-id"):
188188
# Store current data in a tar.gz file.
189-
logger.info("Creating backup of pgdata folder")
189+
logger.info("Creating backup of data folder")
190190
filename = f"{POSTGRESQL_DATA_PATH}-{str(datetime.now()).replace(' ', '-').replace(':', '-')}.tar.gz"
191191
# Input is hardcoded
192192
subprocess.check_call(f"tar -zcf {filename} {POSTGRESQL_DATA_PATH}".split()) # noqa: S603
@@ -649,19 +649,26 @@ def _re_emit_async_relation_changed_event(self) -> None:
649649
)
650650

651651
def _reinitialise_pgdata(self) -> None:
652-
"""Reinitialise the pgdata folder."""
652+
"""Reinitialise the data folder."""
653+
paths = [
654+
"/var/snap/charmed-postgresql/common/data/archive",
655+
POSTGRESQL_DATA_PATH,
656+
"/var/snap/charmed-postgresql/common/data/logs",
657+
"/var/snap/charmed-postgresql/common/data/temp",
658+
]
659+
path = None
653660
try:
654-
path = Path(POSTGRESQL_DATA_PATH)
655-
if path.exists() and path.is_dir():
656-
shutil.rmtree(path)
661+
for path in paths:
662+
path_object = Path(path)
663+
if path_object.exists() and path_object.is_dir():
664+
for item in os.listdir(path):
665+
item_path = os.path.join(path, item)
666+
if os.path.isfile(item_path) or os.path.islink(item_path):
667+
os.remove(item_path)
668+
elif os.path.isdir(item_path):
669+
shutil.rmtree(item_path)
657670
except OSError as e:
658-
raise Exception(
659-
f"Failed to remove contents of the data directory with error: {e!s}"
660-
) from e
661-
os.mkdir(POSTGRESQL_DATA_PATH)
662-
# Expected permissions
663-
os.chmod(POSTGRESQL_DATA_PATH, 0o750) # noqa: S103
664-
self.charm._patroni._change_owner(POSTGRESQL_DATA_PATH)
671+
raise Exception(f"Failed to remove contents from {path} with error: {e!s}") from e
665672

666673
@property
667674
def _relation(self) -> Relation:
@@ -712,9 +719,9 @@ def _stop_database(self, event: RelationChangedEvent) -> bool:
712719
if not self._configure_standby_cluster(event):
713720
return False
714721

715-
# Remove and recreate the pgdata folder to enable replication of the data from the
722+
# Remove and recreate the data folder to enable replication of the data from the
716723
# primary cluster.
717-
logger.info("Removing and recreating pgdata folder")
724+
logger.info("Removing and recreating data folder")
718725
self._reinitialise_pgdata()
719726

720727
# Remove previous cluster information to make it possible to initialise a new cluster.

src/upgrade.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,9 @@ def _prepare_upgrade_from_legacy(self) -> None:
247247
self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY),
248248
extra_user_roles="pg_monitor",
249249
)
250-
self.charm.postgresql.set_up_database()
250+
self.charm.postgresql.set_up_database(
251+
temp_location="/var/snap/charmed-postgresql/common/data/temp"
252+
)
251253
self._set_up_new_access_roles_for_legacy()
252254

253255
def _set_up_new_access_roles_for_legacy(self) -> None:

templates/patroni.yml.j2

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,12 @@ bootstrap:
126126
initdb:
127127
- encoding: UTF8
128128
- data-checksums
129+
- waldir: /var/snap/charmed-postgresql/common/data/logs
129130
{%- endif %}
130131

131132
postgresql:
133+
basebackup:
134+
- waldir: /var/snap/charmed-postgresql/common/data/logs
132135
listen: '{{ self_ip }}:5432'
133136
connect_address: '{{ self_ip }}:5432'
134137
# Path to PostgreSQL binaries used in the database bootstrap process.
@@ -147,6 +150,7 @@ postgresql:
147150
ssl_cert_file: {{ conf_path }}/cert.pem
148151
ssl_key_file: {{ conf_path }}/key.pem
149152
{%- endif %}
153+
temp_tablespaces: temp
150154
unix_socket_directories: /tmp
151155
{%- if pg_parameters %}
152156
{%- for key, value in pg_parameters.items() %}

tests/helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@
77
import yaml
88

99
METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
10-
STORAGE_PATH = METADATA["storage"]["pgdata"]["location"]
10+
STORAGE_PATH = METADATA["storage"]["data"]["location"]

tests/integration/ha_tests/helpers.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -914,12 +914,13 @@ def storage_type(ops_test, app):
914914
return line.split()[3]
915915

916916

917-
def storage_id(ops_test, unit_name):
918-
"""Retrieves storage id associated with provided unit.
917+
def get_storage_ids(ops_test, unit_name):
918+
"""Retrieves storages ids associated with provided unit.
919919
920920
Note: this function exists as a temporary solution until this issue is ported to libjuju 2:
921921
https://github.com/juju/python-libjuju/issues/694
922922
"""
923+
storage_ids = []
923924
model_name = ops_test.model.info.name
924925
proc = subprocess.check_output(f"juju storage --model={model_name}".split())
925926
proc = proc.decode("utf-8")
@@ -934,18 +935,22 @@ def storage_id(ops_test, unit_name):
934935
continue
935936

936937
if line.split()[0] == unit_name:
937-
return line.split()[1]
938+
storage_ids.append(line.split()[1])
939+
return storage_ids
938940

939941

940-
async def add_unit_with_storage(ops_test, app, storage):
941-
"""Adds unit with storage.
942+
async def add_unit_with_storage(ops_test, app, storages):
943+
"""Adds unit with storages.
942944
943945
Note: this function exists as a temporary solution until this issue is resolved:
944946
https://github.com/juju/python-libjuju/issues/695
945947
"""
946948
original_units = {unit.name for unit in ops_test.model.applications[app].units}
947949
model_name = ops_test.model.info.name
948-
add_unit_cmd = f"add-unit {app} --model={model_name} --attach-storage={storage}".split()
950+
add_unit_cmd = f"add-unit {app} --model={model_name}"
951+
for storage in storages:
952+
add_unit_cmd = add_unit_cmd + f" --attach-storage={storage}"
953+
add_unit_cmd = add_unit_cmd.split()
949954
return_code, _, _ = await ops_test.juju(*add_unit_cmd)
950955
assert return_code == 0, "Failed to add unit with storage"
951956
async with ops_test.fast_forward():
@@ -958,7 +963,9 @@ async def add_unit_with_storage(ops_test, app, storage):
958963

959964
# verify storage attached
960965
new_unit = (current_units - original_units).pop()
961-
assert storage_id(ops_test, new_unit) == storage, "unit added with incorrect storage"
966+
assert sorted(get_storage_ids(ops_test, new_unit)) == sorted(storages), (
967+
"unit added with incorrect storage"
968+
)
962969

963970
# return a reference to newly added unit
964971
for unit in ops_test.model.applications[app].units:
@@ -1050,18 +1057,22 @@ async def check_db(ops_test: OpsTest, app: str, db: str) -> bool:
10501057
return db in query
10511058

10521059

1053-
async def get_any_deatached_storage(ops_test: OpsTest) -> str:
1054-
"""Returns any of the current available deatached storage."""
1060+
async def get_detached_storages(ops_test: OpsTest) -> list[str]:
1061+
"""Returns the current available detached storage."""
10551062
return_code, storages_list, stderr = await ops_test.juju(
10561063
"storage", "-m", f"{ops_test.controller_name}:{ops_test.model.info.name}", "--format=json"
10571064
)
10581065
if return_code != 0:
10591066
raise Exception(f"failed to get storages info with error: {stderr}")
10601067

10611068
parsed_storages_list = json.loads(storages_list)
1069+
detached_storages = []
10621070
for storage_name, storage in parsed_storages_list["storage"].items():
10631071
if (str(storage["status"]["current"]) == "detached") and (str(storage["life"] == "alive")):
1064-
return storage_name
1072+
detached_storages.append(storage_name)
1073+
1074+
if len(detached_storages) > 0:
1075+
return detached_storages
10651076

10661077
raise Exception("failed to get deatached storage")
10671078

tests/integration/ha_tests/test_restore_cluster.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
)
1818
from .helpers import (
1919
add_unit_with_storage,
20+
get_storage_ids,
2021
reused_full_cluster_recovery_storage,
21-
storage_id,
2222
)
2323

2424
FIRST_APPLICATION = "first-cluster"
@@ -40,7 +40,12 @@ async def test_build_and_deploy(ops_test: OpsTest, charm) -> None:
4040
application_name=FIRST_APPLICATION,
4141
num_units=3,
4242
base=CHARM_BASE,
43-
storage={"pgdata": {"pool": "lxd-btrfs", "size": 2048}},
43+
storage={
44+
"archive": {"pool": "lxd-btrfs", "size": 8046},
45+
"data": {"pool": "lxd-btrfs", "size": 8046},
46+
"logs": {"pool": "lxd-btrfs", "size": 8046},
47+
"temp": {"pool": "lxd-btrfs", "size": 8046},
48+
},
4449
config={"profile": "testing"},
4550
)
4651

@@ -91,7 +96,7 @@ async def test_cluster_restore(ops_test):
9196
logger.info("Downscaling the existing cluster")
9297
storages = []
9398
for unit in ops_test.model.applications[FIRST_APPLICATION].units:
94-
storages.append(storage_id(ops_test, unit.name))
99+
storages.append(get_storage_ids(ops_test, unit.name))
95100
await ops_test.model.destroy_unit(unit.name)
96101

97102
await ops_test.model.remove_application(FIRST_APPLICATION, block_until_done=True)

0 commit comments

Comments
 (0)