Skip to content

Commit 1c8031b

Browse files
authored
Merge pull request #121 from stackhpc/upstream/2023.1-2024-10-21
Synchronise 2023.1 with upstream
2 parents a2f49a7 + 945aabc commit 1c8031b

File tree

12 files changed

+404
-97
lines changed

12 files changed

+404
-97
lines changed

nova/cmd/manage.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,10 @@ def version(self):
257257
print(migration.db_version())
258258

259259
@args('--max_rows', type=int, metavar='<number>', dest='max_rows',
260-
help='Maximum number of deleted rows to archive. Defaults to 1000. '
261-
'Note that this number does not include the corresponding '
262-
'rows, if any, that are removed from the API database for '
263-
'deleted instances.')
260+
help='Maximum number of deleted rows to archive per table. Defaults '
261+
'to 1000. Note that this number is a soft limit and does not '
262+
'include the corresponding rows, if any, that are removed '
263+
'from the API database for deleted instances.')
264264
@args('--before', metavar='<date>',
265265
help=('Archive rows that have been deleted before this date. '
266266
'Accepts date strings in the default format output by the '
@@ -432,7 +432,10 @@ def _do_archive(
432432
'cell1.instances': 5}
433433
:param cctxt: Cell-targeted nova.context.RequestContext if archiving
434434
across all cells
435-
:param max_rows: Maximum number of deleted rows to archive
435+
:param max_rows: Maximum number of deleted rows to archive per table.
436+
Note that this number is a soft limit and does not include the
437+
corresponding rows, if any, that are removed from the API database
438+
for deleted instances.
436439
:param until_complete: Whether to run continuously until all deleted
437440
rows are archived
438441
:param verbose: Whether to print how many rows were archived per table
@@ -445,15 +448,26 @@ def _do_archive(
445448
"""
446449
ctxt = context.get_admin_context()
447450
while True:
448-
run, deleted_instance_uuids, total_rows_archived = \
451+
# table_to_rows = {table_name: number_of_rows_archived}
452+
# deleted_instance_uuids = ['uuid1', 'uuid2', ...]
453+
table_to_rows, deleted_instance_uuids, total_rows_archived = \
449454
db.archive_deleted_rows(
450455
cctxt, max_rows, before=before_date, task_log=task_log)
451-
for table_name, rows_archived in run.items():
456+
457+
for table_name, rows_archived in table_to_rows.items():
452458
if cell_name:
453459
table_name = cell_name + '.' + table_name
454460
table_to_rows_archived.setdefault(table_name, 0)
455461
table_to_rows_archived[table_name] += rows_archived
456-
if deleted_instance_uuids:
462+
463+
# deleted_instance_uuids does not necessarily mean that any
464+
# instances rows were archived because it is obtained by a query
465+
# separate from the archive queries. For example, if a
466+
# DBReferenceError was raised while processing the instances table,
467+
# we would have skipped the table and had 0 rows archived even
468+
# though deleted instances rows were found.
469+
instances_archived = table_to_rows.get('instances', 0)
470+
if deleted_instance_uuids and instances_archived:
457471
table_to_rows_archived.setdefault(
458472
'API_DB.instance_mappings', 0)
459473
table_to_rows_archived.setdefault(
@@ -476,8 +490,9 @@ def _do_archive(
476490

477491
# If we're not archiving until there is nothing more to archive, we
478492
# have reached max_rows in this cell DB or there was nothing to
479-
# archive.
480-
if not until_complete or not run:
493+
# archive. We check the values() in case we get something like
494+
# table_to_rows = {'instances': 0} back somehow.
495+
if not until_complete or not any(table_to_rows.values()):
481496
break
482497
if verbose:
483498
sys.stdout.write('.')

nova/conf/pci.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,9 @@
175175
its corresponding PF), otherwise they will be ignored and not
176176
available for allocation.
177177
- ``resource_class`` - optional Placement resource class name to be used
178-
to track the matching PCI devices in Placement when [pci]device_spec is
179-
True. It can be a standard resource class from the
178+
to track the matching PCI devices in Placement when
179+
[pci]report_in_placement is True.
180+
It can be a standard resource class from the
180181
``os-resource-classes`` lib. Or can be any string. In that case Nova will
181182
normalize it to a proper Placement resource class by making it upper
182183
case, replacing any consecutive character outside of ``[A-Z0-9_]`` with a

nova/db/main/api.py

Lines changed: 93 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4407,6 +4407,9 @@ def _archive_deleted_rows_for_table(
44074407
select = select.order_by(column).limit(max_rows)
44084408
with conn.begin():
44094409
rows = conn.execute(select).fetchall()
4410+
4411+
# This is a list of IDs of rows that should be archived from this table,
4412+
# limited to a length of max_rows.
44104413
records = [r[0] for r in rows]
44114414

44124415
# We will archive deleted rows for this table and also generate insert and
@@ -4419,51 +4422,103 @@ def _archive_deleted_rows_for_table(
44194422

44204423
# Keep track of any extra tablenames to number of rows that we archive by
44214424
# following FK relationships.
4422-
# {tablename: extra_rows_archived}
4425+
#
4426+
# extras = {tablename: number_of_extra_rows_archived}
44234427
extras = collections.defaultdict(int)
4424-
if records:
4425-
insert = shadow_table.insert().from_select(
4426-
columns, sql.select(table).where(column.in_(records))
4427-
).inline()
4428-
delete = table.delete().where(column.in_(records))
4428+
4429+
if not records:
4430+
# Nothing to archive, so return.
4431+
return rows_archived, deleted_instance_uuids, extras
4432+
4433+
# Keep track of how many rows we accumulate for the insert+delete database
4434+
# transaction and cap it as soon as it is >= max_rows. Because we will
4435+
# archive all child rows of a parent row along with the parent at the same
4436+
# time, we end up with extra rows to archive in addition to len(records).
4437+
num_rows_in_batch = 0
4438+
# The sequence of query statements we will execute in a batch. These are
4439+
# ordered: [child1, child1, parent1, child2, child2, child2, parent2, ...]
4440+
# Parent + child "trees" are kept together to avoid FK constraint
4441+
# violations.
4442+
statements_in_batch = []
4443+
# The list of records in the batch. This is used for collecting deleted
4444+
# instance UUIDs in the case of the 'instances' table.
4445+
records_in_batch = []
4446+
4447+
# (melwitt): We will gather rows related by foreign key relationship for
4448+
# each deleted row, one at a time. We do it this way to keep track of and
4449+
# limit the total number of rows that will be archived in a single database
4450+
# transaction. In a large scale database with potentially hundreds of
4451+
# thousands of deleted rows, if we don't limit the size of the transaction
4452+
# based on max_rows, we can get into a situation where we get stuck not
4453+
# able to make much progress. The value of max_rows has to be 1) small
4454+
# enough to not exceed the database's max packet size limit or timeout with
4455+
# a deadlock but 2) large enough to make progress in an environment with a
4456+
# constant high volume of create and delete traffic. By archiving each
4457+
# parent + child rows tree one at a time, we can ensure meaningful progress
4458+
# can be made while allowing the caller to predictably control the size of
4459+
# the database transaction with max_rows.
4460+
for record in records:
44294461
# Walk FK relationships and add insert/delete statements for rows that
44304462
# refer to this table via FK constraints. fk_inserts and fk_deletes
44314463
# will be prepended to by _get_fk_stmts if referring rows are found by
44324464
# FK constraints.
44334465
fk_inserts, fk_deletes = _get_fk_stmts(
4434-
metadata, conn, table, column, records)
4435-
4436-
# NOTE(tssurya): In order to facilitate the deletion of records from
4437-
# instance_mappings, request_specs and instance_group_member tables in
4438-
# the nova_api DB, the rows of deleted instances from the instances
4439-
# table are stored prior to their deletion. Basically the uuids of the
4440-
# archived instances are queried and returned.
4441-
if tablename == "instances":
4442-
query_select = sql.select(table.c.uuid).where(
4443-
table.c.id.in_(records)
4444-
)
4445-
with conn.begin():
4446-
rows = conn.execute(query_select).fetchall()
4447-
deleted_instance_uuids = [r[0] for r in rows]
4466+
metadata, conn, table, column, [record])
4467+
statements_in_batch.extend(fk_inserts + fk_deletes)
4468+
# statement to add parent row to shadow table
4469+
insert = shadow_table.insert().from_select(
4470+
columns, sql.select(table).where(column.in_([record]))).inline()
4471+
statements_in_batch.append(insert)
4472+
# statement to remove parent row from main table
4473+
delete = table.delete().where(column.in_([record]))
4474+
statements_in_batch.append(delete)
44484475

4449-
try:
4450-
# Group the insert and delete in a transaction.
4451-
with conn.begin():
4452-
for fk_insert in fk_inserts:
4453-
conn.execute(fk_insert)
4454-
for fk_delete in fk_deletes:
4455-
result_fk_delete = conn.execute(fk_delete)
4456-
extras[fk_delete.table.name] += result_fk_delete.rowcount
4457-
conn.execute(insert)
4458-
result_delete = conn.execute(delete)
4459-
rows_archived += result_delete.rowcount
4460-
except db_exc.DBReferenceError as ex:
4461-
# A foreign key constraint keeps us from deleting some of
4462-
# these rows until we clean up a dependent table. Just
4463-
# skip this table for now; we'll come back to it later.
4464-
LOG.warning("IntegrityError detected when archiving table "
4465-
"%(tablename)s: %(error)s",
4466-
{'tablename': tablename, 'error': str(ex)})
4476+
records_in_batch.append(record)
4477+
4478+
# Check whether were have a full batch >= max_rows. Rows are counted as
4479+
# the number of rows that will be moved in the database transaction.
4480+
# So each insert+delete pair represents one row that will be moved.
4481+
# 1 parent + its fks
4482+
num_rows_in_batch += 1 + len(fk_inserts)
4483+
4484+
if max_rows is not None and num_rows_in_batch >= max_rows:
4485+
break
4486+
4487+
# NOTE(tssurya): In order to facilitate the deletion of records from
4488+
# instance_mappings, request_specs and instance_group_member tables in the
4489+
# nova_api DB, the rows of deleted instances from the instances table are
4490+
# stored prior to their deletion. Basically the uuids of the archived
4491+
# instances are queried and returned.
4492+
if tablename == "instances":
4493+
query_select = sql.select(table.c.uuid).where(
4494+
table.c.id.in_(records_in_batch))
4495+
with conn.begin():
4496+
rows = conn.execute(query_select).fetchall()
4497+
# deleted_instance_uuids = ['uuid1', 'uuid2', ...]
4498+
deleted_instance_uuids = [r[0] for r in rows]
4499+
4500+
try:
4501+
# Group the insert and delete in a transaction.
4502+
with conn.begin():
4503+
for statement in statements_in_batch:
4504+
result = conn.execute(statement)
4505+
result_tablename = statement.table.name
4506+
# Add to archived row counts if not a shadow table.
4507+
if not result_tablename.startswith(_SHADOW_TABLE_PREFIX):
4508+
if result_tablename == tablename:
4509+
# Number of tablename (parent) rows archived.
4510+
rows_archived += result.rowcount
4511+
else:
4512+
# Number(s) of child rows archived.
4513+
extras[result_tablename] += result.rowcount
4514+
4515+
except db_exc.DBReferenceError as ex:
4516+
# A foreign key constraint keeps us from deleting some of these rows
4517+
# until we clean up a dependent table. Just skip this table for now;
4518+
# we'll come back to it later.
4519+
LOG.warning("IntegrityError detected when archiving table "
4520+
"%(tablename)s: %(error)s",
4521+
{'tablename': tablename, 'error': str(ex)})
44674522

44684523
conn.close()
44694524

nova/exception.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,10 @@ class FileNotFound(NotFound):
11311131
msg_fmt = _("File %(file_path)s could not be found.")
11321132

11331133

1134+
class DeviceBusy(NovaException):
1135+
msg_fmt = _("device %(file_path)s is busy.")
1136+
1137+
11341138
class ClassNotFound(NotFound):
11351139
msg_fmt = _("Class %(class_name)s could not be found: %(exception)s")
11361140

nova/filesystem.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,48 +12,97 @@
1212

1313
"""Functions to address filesystem calls, particularly sysfs."""
1414

15+
import functools
1516
import os
17+
import time
1618

1719
from oslo_log import log as logging
1820

1921
from nova import exception
2022

2123
LOG = logging.getLogger(__name__)
24+
SYS = '/sys'
25+
RETRY_LIMIT = 5
2226

2327

24-
SYS = '/sys'
28+
# a retry decorator to handle EBUSY
29+
def retry_if_busy(func):
30+
"""Decorator to retry a function if it raises DeviceBusy.
31+
32+
This decorator will retry the function RETRY_LIMIT=5 times if it raises
33+
DeviceBusy. It will sleep for 1 second on the first retry, 2 seconds on
34+
the second retry, and so on, up to RETRY_LIMIT seconds. If the function
35+
still raises DeviceBusy after RETRY_LIMIT retries, the exception will be
36+
raised.
37+
"""
2538

39+
@functools.wraps(func)
40+
def wrapper(*args, **kwargs):
41+
for i in range(RETRY_LIMIT):
42+
try:
43+
return func(*args, **kwargs)
44+
except exception.DeviceBusy as e:
45+
# if we have retried RETRY_LIMIT times, raise the exception
46+
# otherwise, sleep and retry, i is 0-based so we need
47+
# to add 1 to it
48+
count = i + 1
49+
if count < RETRY_LIMIT:
50+
LOG.debug(
51+
f"File {e.kwargs['file_path']} is busy, "
52+
f"sleeping {count} seconds before retrying")
53+
time.sleep(count)
54+
continue
55+
raise
56+
return wrapper
2657

2758
# NOTE(bauzas): this method is deliberately not wrapped in a privsep entrypoint
59+
60+
61+
@retry_if_busy
2862
def read_sys(path: str) -> str:
2963
"""Reads the content of a file in the sys filesystem.
3064
3165
:param path: relative or absolute. If relative, will be prefixed by /sys.
3266
:returns: contents of that file.
3367
:raises: nova.exception.FileNotFound if we can't read that file.
68+
:raises: nova.exception.DeviceBusy if the file is busy.
3469
"""
3570
try:
3671
# The path can be absolute with a /sys prefix but that's fine.
3772
with open(os.path.join(SYS, path), mode='r') as data:
3873
return data.read()
39-
except (OSError, ValueError) as exc:
74+
except OSError as exc:
75+
# errno 16 is EBUSY, which means the file is busy.
76+
if exc.errno == 16:
77+
raise exception.DeviceBusy(file_path=path) from exc
78+
# convert permission denied to file not found
79+
raise exception.FileNotFound(file_path=path) from exc
80+
except ValueError as exc:
4081
raise exception.FileNotFound(file_path=path) from exc
4182

4283

4384
# NOTE(bauzas): this method is deliberately not wrapped in a privsep entrypoint
4485
# In order to correctly use it, you need to decorate the caller with a specific
4586
# privsep entrypoint.
87+
@retry_if_busy
4688
def write_sys(path: str, data: str) -> None:
4789
"""Writes the content of a file in the sys filesystem with data.
4890
4991
:param path: relative or absolute. If relative, will be prefixed by /sys.
5092
:param data: the data to write.
5193
:returns: contents of that file.
5294
:raises: nova.exception.FileNotFound if we can't write that file.
95+
:raises: nova.exception.DeviceBusy if the file is busy.
5396
"""
5497
try:
5598
# The path can be absolute with a /sys prefix but that's fine.
5699
with open(os.path.join(SYS, path), mode='w') as fd:
57100
fd.write(data)
58-
except (OSError, ValueError) as exc:
101+
except OSError as exc:
102+
# errno 16 is EBUSY, which means the file is busy.
103+
if exc.errno == 16:
104+
raise exception.DeviceBusy(file_path=path) from exc
105+
# convert permission denied to file not found
106+
raise exception.FileNotFound(file_path=path) from exc
107+
except ValueError as exc:
59108
raise exception.FileNotFound(file_path=path) from exc

nova/tests/functional/db/test_archive.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,50 @@ def test_archive_deleted_rows_incomplete(self):
174174
break
175175
self.assertFalse(exceptions)
176176

177+
def test_archive_deleted_rows_parent_child_trees_one_at_time(self):
178+
"""Test that we are archiving parent + child rows "trees" one at a time
179+
180+
Previously, we archived deleted rows in batches of max_rows parents +
181+
their child rows in a single database transaction. Doing it that way
182+
limited how high a value of max_rows could be specified by the caller
183+
because of the size of the database transaction it could generate.
184+
185+
For example, in a large scale deployment with hundreds of thousands of
186+
deleted rows and constant server creation and deletion activity, a
187+
value of max_rows=1000 might exceed the database's configured maximum
188+
packet size or timeout due to a database deadlock, forcing the operator
189+
to use a much lower max_rows value like 100 or 50.
190+
191+
And when the operator has e.g. 500,000 deleted instances rows (and
192+
millions of deleted rows total) they are trying to archive, being
193+
forced to use a max_rows value serveral orders of magnitude lower than
194+
the number of rows they need to archive was a poor user experience.
195+
196+
This tests that we are archiving each parent plus their child rows one
197+
at a time while limiting the total number of rows per table in a batch
198+
as soon as the total number of archived rows is >= max_rows.
199+
"""
200+
# Boot two servers and delete them, then try to archive rows.
201+
for i in range(2):
202+
server = self._create_server()
203+
self._delete_server(server)
204+
205+
# Use max_rows=5 to limit the archive to one complete parent +
206+
# child rows tree.
207+
table_to_rows, _, _ = db.archive_deleted_rows(max_rows=5)
208+
209+
# We should have archived only one instance because the parent +
210+
# child tree will have >= max_rows of 5.
211+
self.assertEqual(1, table_to_rows['instances'])
212+
213+
# Archive once more to archive the other instance (and its child rows).
214+
table_to_rows, _, _ = db.archive_deleted_rows(max_rows=5)
215+
self.assertEqual(1, table_to_rows['instances'])
216+
217+
# There should be nothing else to archive.
218+
_, _, total_rows_archived = db.archive_deleted_rows(max_rows=100)
219+
self.assertEqual(0, total_rows_archived)
220+
177221
def _get_table_counts(self):
178222
engine = db.get_engine()
179223
conn = engine.connect()

0 commit comments

Comments
 (0)