Skip to content

Commit 4c1f43b

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Dynamically archive FK related records in archive_deleted_rows" into stable/victoria
2 parents 05627f9 + 7b4f479 commit 4c1f43b

File tree

3 files changed

+170
-95
lines changed

3 files changed

+170
-95
lines changed

nova/db/sqlalchemy/api.py

Lines changed: 153 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -4098,64 +4098,132 @@ def task_log_end_task(context, task_name, period_beginning, period_ending,
40984098
##################
40994099

41004100

4101-
def _archive_if_instance_deleted(table, shadow_table, instances, conn,
4102-
max_rows, before):
4103-
"""Look for records that pertain to deleted instances, but may not be
4104-
deleted themselves. This catches cases where we delete an instance,
4105-
but leave some residue because of a failure in a cleanup path or
4106-
similar.
4107-
4108-
Logic is: if I have a column called instance_uuid, and that instance
4109-
is deleted, then I can be deleted.
4110-
"""
4111-
4112-
# NOTE(jake): handle instance_actions_events differently as it relies on
4113-
# instance_actions.id not instances.uuid
4114-
if table.name == "instance_actions_events":
4115-
instance_actions = models.BASE.metadata.tables["instance_actions"]
4116-
query_select = sql.select(
4117-
[table],
4118-
and_(instances.c.deleted != instances.c.deleted.default.arg,
4119-
instances.c.uuid == instance_actions.c.instance_uuid,
4120-
instance_actions.c.id == table.c.action_id))
4101+
def _get_tables_with_fk_to_table(table):
4102+
"""Get a list of tables that refer to the given table by foreign key (FK).
41214103
4122-
else:
4123-
query_select = sql.select(
4124-
[table],
4125-
and_(instances.c.deleted != instances.c.deleted.default.arg,
4126-
instances.c.uuid == table.c.instance_uuid))
4127-
4128-
if before:
4129-
query_select = query_select.where(instances.c.deleted_at < before)
4104+
:param table: Table object (parent) for which to find references by FK
41304105
4131-
query_select = query_select.order_by(table.c.id).limit(max_rows)
4132-
4133-
query_insert = shadow_table.insert(inline=True).\
4134-
from_select([c.name for c in table.c], query_select)
4135-
4136-
delete_statement = DeleteFromSelect(table, query_select,
4137-
table.c.id)
4106+
:returns: A list of Table objects that refer to the specified table by FK
4107+
"""
4108+
tables = []
4109+
for t in models.BASE.metadata.tables.values():
4110+
for fk in t.foreign_keys:
4111+
if fk.references(table):
4112+
tables.append(t)
4113+
return tables
4114+
4115+
4116+
def _get_fk_stmts(metadata, conn, table, column, records):
4117+
"""Find records related to this table by foreign key (FK) and create and
4118+
return insert/delete statements for them.
4119+
4120+
Logic is: find the tables that reference the table passed to this method
4121+
and walk the tree of references by FK. As child records are found, prepend
4122+
them to deques to execute later in a single database transaction (to avoid
4123+
orphaning related records if any one insert/delete fails or the archive
4124+
process is otherwise interrupted).
4125+
4126+
:param metadata: Metadata object to use to construct a shadow Table object
4127+
:param conn: Connection object to use to select records related by FK
4128+
:param table: Table object (parent) for which to find references by FK
4129+
:param column: Column object (parent) to use to select records related by
4130+
FK
4131+
:param records: A list of records (column values) to use to select records
4132+
related by FK
4133+
4134+
:returns: tuple of (insert statements, delete statements) for records
4135+
related by FK to insert into shadow tables and delete from main tables
4136+
"""
4137+
inserts = collections.deque()
4138+
deletes = collections.deque()
4139+
fk_tables = _get_tables_with_fk_to_table(table)
4140+
for fk_table in fk_tables:
4141+
# Create the shadow table for the referencing table.
4142+
fk_shadow_tablename = _SHADOW_TABLE_PREFIX + fk_table.name
4143+
try:
4144+
fk_shadow_table = Table(fk_shadow_tablename, metadata,
4145+
autoload=True)
4146+
except NoSuchTableError:
4147+
# No corresponding shadow table; skip it.
4148+
continue
41384149

4139-
try:
4140-
with conn.begin():
4141-
conn.execute(query_insert)
4142-
result_delete = conn.execute(delete_statement)
4143-
return result_delete.rowcount
4144-
except db_exc.DBReferenceError as ex:
4145-
LOG.warning('Failed to archive %(table)s: %(error)s',
4146-
{'table': table.name,
4147-
'error': six.text_type(ex)})
4148-
return 0
4150+
# TODO(stephenfin): Drop this when we drop the table
4151+
if fk_table.name == "dns_domains":
4152+
# We have one table (dns_domains) where the key is called
4153+
# "domain" rather than "id"
4154+
fk_column = fk_table.c.domain
4155+
else:
4156+
fk_column = fk_table.c.id
4157+
4158+
for fk in fk_table.foreign_keys:
4159+
# We need to find the records in the referring (child) table that
4160+
# correspond to the records in our (parent) table so we can archive
4161+
# them.
4162+
4163+
# First, select the column in the parent referenced by the child
4164+
# table that corresponds to the parent table records that were
4165+
# passed in.
4166+
# Example: table = 'instances' and fk_table = 'instance_extra'
4167+
# fk.parent = instance_extra.instance_uuid
4168+
# fk.column = instances.uuid
4169+
# SELECT instances.uuid FROM instances, instance_extra
4170+
# WHERE instance_extra.instance_uuid = instances.uuid
4171+
# AND instance.id IN (<ids>)
4172+
# We need the instance uuids for the <ids> in order to
4173+
# look up the matching instance_extra records.
4174+
select = sql.select([fk.column]).where(
4175+
sql.and_(fk.parent == fk.column, column.in_(records)))
4176+
rows = conn.execute(select).fetchall()
4177+
p_records = [r[0] for r in rows]
4178+
# Then, select rows in the child table that correspond to the
4179+
# parent table records that were passed in.
4180+
# Example: table = 'instances' and fk_table = 'instance_extra'
4181+
# fk.parent = instance_extra.instance_uuid
4182+
# fk.column = instances.uuid
4183+
# SELECT instance_extra.id FROM instance_extra, instances
4184+
# WHERE instance_extra.instance_uuid = instances.uuid
4185+
# AND instances.uuid IN (<uuids>)
4186+
# We will get the instance_extra ids we need to archive
4187+
# them.
4188+
fk_select = sql.select([fk_column]).where(
4189+
sql.and_(fk.parent == fk.column, fk.column.in_(p_records)))
4190+
fk_rows = conn.execute(fk_select).fetchall()
4191+
fk_records = [r[0] for r in fk_rows]
4192+
if fk_records:
4193+
# If we found any records in the child table, create shadow
4194+
# table insert statements for them and prepend them to the
4195+
# deque.
4196+
fk_columns = [c.name for c in fk_table.c]
4197+
fk_insert = fk_shadow_table.insert(inline=True).\
4198+
from_select(fk_columns, sql.select([fk_table],
4199+
fk_column.in_(fk_records)))
4200+
inserts.appendleft(fk_insert)
4201+
# Create main table delete statements and prepend them to the
4202+
# deque.
4203+
fk_delete = fk_table.delete().where(fk_column.in_(fk_records))
4204+
deletes.appendleft(fk_delete)
4205+
# Repeat for any possible nested child tables.
4206+
i, d = _get_fk_stmts(metadata, conn, fk_table, fk_column, fk_records)
4207+
inserts.extendleft(i)
4208+
deletes.extendleft(d)
4209+
4210+
return inserts, deletes
41494211

41504212

41514213
def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
41524214
"""Move up to max_rows rows from one tables to the corresponding
41534215
shadow table.
41544216
4155-
:returns: 2-item tuple:
4217+
Will also follow FK constraints and archive all referring rows.
4218+
Example: archving a record from the 'instances' table will also archive
4219+
the 'instance_extra' record before archiving the 'instances' record.
4220+
4221+
:returns: 3-item tuple:
41564222
41574223
- number of rows archived
41584224
- list of UUIDs of instances that were archived
4225+
- number of extra rows archived (due to FK constraints)
4226+
dict of {tablename: rows_archived}
41594227
"""
41604228
conn = metadata.bind.connect()
41614229
# NOTE(tdurakov): table metadata should be received
@@ -4171,7 +4239,7 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
41714239
shadow_table = Table(shadow_tablename, metadata, autoload=True)
41724240
except NoSuchTableError:
41734241
# No corresponding shadow table; skip it.
4174-
return rows_archived, deleted_instance_uuids
4242+
return rows_archived, deleted_instance_uuids, {}
41754243

41764244
# TODO(stephenfin): Drop this when we drop the table
41774245
if tablename == "dns_domains":
@@ -4194,10 +4262,29 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
41944262
rows = conn.execute(select).fetchall()
41954263
records = [r[0] for r in rows]
41964264

4265+
# We will archive deleted rows for this table and also generate insert and
4266+
# delete statements for extra rows we may archive by following FK
4267+
# relationships. Because we are iterating over the sorted_tables (list of
4268+
# Table objects sorted in order of foreign key dependency), new inserts and
4269+
# deletes ("leaves") will be added to the fronts of the deques created in
4270+
# _get_fk_stmts. This way, we make sure we delete child table records
4271+
# before we delete their parent table records.
4272+
4273+
# Keep track of any extra tablenames to number of rows that we archive by
4274+
# following FK relationships.
4275+
# {tablename: extra_rows_archived}
4276+
extras = collections.defaultdict(int)
41974277
if records:
41984278
insert = shadow_table.insert(inline=True).\
41994279
from_select(columns, sql.select([table], column.in_(records)))
42004280
delete = table.delete().where(column.in_(records))
4281+
# Walk FK relationships and add insert/delete statements for rows that
4282+
# refer to this table via FK constraints. fk_inserts and fk_deletes
4283+
# will be prepended to by _get_fk_stmts if referring rows are found by
4284+
# FK constraints.
4285+
fk_inserts, fk_deletes = _get_fk_stmts(
4286+
metadata, conn, table, column, records)
4287+
42014288
# NOTE(tssurya): In order to facilitate the deletion of records from
42024289
# instance_mappings, request_specs and instance_group_member tables in
42034290
# the nova_api DB, the rows of deleted instances from the instances
@@ -4211,9 +4298,14 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
42114298
try:
42124299
# Group the insert and delete in a transaction.
42134300
with conn.begin():
4301+
for fk_insert in fk_inserts:
4302+
conn.execute(fk_insert)
4303+
for fk_delete in fk_deletes:
4304+
result_fk_delete = conn.execute(fk_delete)
4305+
extras[fk_delete.table.name] += result_fk_delete.rowcount
42144306
conn.execute(insert)
42154307
result_delete = conn.execute(delete)
4216-
rows_archived = result_delete.rowcount
4308+
rows_archived += result_delete.rowcount
42174309
except db_exc.DBReferenceError as ex:
42184310
# A foreign key constraint keeps us from deleting some of
42194311
# these rows until we clean up a dependent table. Just
@@ -4222,22 +4314,7 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
42224314
"%(tablename)s: %(error)s",
42234315
{'tablename': tablename, 'error': six.text_type(ex)})
42244316

4225-
# NOTE(jake): instance_actions_events doesn't have a instance_uuid column
4226-
# but still needs to be archived as it is a FK constraint
4227-
if ((max_rows is None or rows_archived < max_rows) and
4228-
# NOTE(melwitt): The pci_devices table uses the 'instance_uuid'
4229-
# column to track the allocated association of a PCI device and its
4230-
# records are not tied to the lifecycles of instance records.
4231-
(tablename != 'pci_devices' and
4232-
'instance_uuid' in columns or
4233-
tablename == 'instance_actions_events')):
4234-
instances = models.BASE.metadata.tables['instances']
4235-
limit = max_rows - rows_archived if max_rows is not None else None
4236-
extra = _archive_if_instance_deleted(table, shadow_table, instances,
4237-
conn, limit, before)
4238-
rows_archived += extra
4239-
4240-
return rows_archived, deleted_instance_uuids
4317+
return rows_archived, deleted_instance_uuids, extras
42414318

42424319

42434320
def archive_deleted_rows(context=None, max_rows=None, before=None):
@@ -4261,21 +4338,26 @@ def archive_deleted_rows(context=None, max_rows=None, before=None):
42614338
- list of UUIDs of instances that were archived
42624339
- total number of rows that were archived
42634340
"""
4264-
table_to_rows_archived = {}
4341+
table_to_rows_archived = collections.defaultdict(int)
42654342
deleted_instance_uuids = []
42664343
total_rows_archived = 0
42674344
meta = MetaData(get_engine(use_slave=True, context=context))
42684345
meta.reflect()
4269-
# Reverse sort the tables so we get the leaf nodes first for processing.
4270-
for table in reversed(meta.sorted_tables):
4346+
# Get the sorted list of tables in order of foreign key dependency.
4347+
# Process the parent tables and find their dependent records in order to
4348+
# archive the related records in a single database transactions. The goal
4349+
# is to avoid a situation where, for example, an 'instances' table record
4350+
# is missing its corresponding 'instance_extra' record due to running the
4351+
# archive_deleted_rows command with max_rows.
4352+
for table in meta.sorted_tables:
42714353
tablename = table.name
42724354
rows_archived = 0
42734355
# skip the special sqlalchemy-migrate migrate_version table and any
42744356
# shadow tables
42754357
if (tablename == 'migrate_version' or
42764358
tablename.startswith(_SHADOW_TABLE_PREFIX)):
42774359
continue
4278-
rows_archived, _deleted_instance_uuids = (
4360+
rows_archived, _deleted_instance_uuids, extras = (
42794361
_archive_deleted_rows_for_table(
42804362
meta, tablename,
42814363
max_rows=max_rows - total_rows_archived,
@@ -4286,6 +4368,9 @@ def archive_deleted_rows(context=None, max_rows=None, before=None):
42864368
# Only report results for tables that had updates.
42874369
if rows_archived:
42884370
table_to_rows_archived[tablename] = rows_archived
4371+
for tablename, extra_rows_archived in extras.items():
4372+
table_to_rows_archived[tablename] += extra_rows_archived
4373+
total_rows_archived += extra_rows_archived
42894374
if total_rows_archived >= max_rows:
42904375
break
42914376
return table_to_rows_archived, deleted_instance_uuids, total_rows_archived

nova/tests/functional/db/test_archive.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,7 @@ def test_archive_deleted_rows_incomplete(self):
167167
exceptions.append(ex)
168168
if archived == 0:
169169
break
170-
# FIXME(melwitt): OrphanedObjectError is raised because of the bug.
171-
self.assertTrue(exceptions)
172-
for ex in exceptions:
173-
self.assertEqual(500, ex.response.status_code)
174-
self.assertIn('OrphanedObjectError', str(ex))
175-
# FIXME(melwitt): Uncomment when the bug is fixed.
176-
# self.assertFalse(exceptions)
170+
self.assertFalse(exceptions)
177171

178172
def _get_table_counts(self):
179173
engine = sqlalchemy_api.get_engine()

nova/tests/unit/db/test_db_api.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6295,17 +6295,24 @@ def test_archive_deleted_rows_before(self):
62956295
instance_actions_events=1)
62966296
self._assertEqualObjects(expected, results[0])
62976297

6298-
# Archive 1 row deleted before 2017-01-03. instance_action_events
6299-
# should be the table with row deleted due to FK contraints
6298+
# Archive 1 row deleted before 2017-01-03
6299+
# Because the instances table will be processed first, tables that
6300+
# refer to it (instance_actions and instance_action_events) will be
6301+
# visited and archived in the same transaction as the instance, to
6302+
# avoid orphaning the instance record (archive dependent records in one
6303+
# transaction)
63006304
before_date = dateutil_parser.parse('2017-01-03', fuzzy=True)
63016305
results = db.archive_deleted_rows(max_rows=1, before=before_date)
6302-
expected = dict(instance_actions_events=1)
6306+
expected = dict(instances=1,
6307+
instance_actions=1,
6308+
instance_actions_events=1)
63036309
self._assertEqualObjects(expected, results[0])
6304-
# Archive all other rows deleted before 2017-01-03. This should
6305-
# delete row in instance_actions, then row in instances due to FK
6306-
# constraints
6310+
# Try to archive all other rows deleted before 2017-01-03. This should
6311+
# not archive anything because the instances table and tables that
6312+
# refer to it (instance_actions and instance_action_events) were all
6313+
# archived in the last run.
63076314
results = db.archive_deleted_rows(max_rows=100, before=before_date)
6308-
expected = dict(instances=1, instance_actions=1)
6315+
expected = {}
63096316
self._assertEqualObjects(expected, results[0])
63106317

63116318
# Verify we have 4 left in main
@@ -6453,19 +6460,8 @@ def test_archive_deleted_rows_for_migrations(self):
64536460
ins_stmt = self.migrations.insert().values(instance_uuid=instance_uuid,
64546461
deleted=0)
64556462
self.conn.execute(ins_stmt)
6456-
# The first try to archive instances should fail, due to FK.
6457-
num = sqlalchemy_api._archive_deleted_rows_for_table(self.metadata,
6458-
"instances",
6459-
max_rows=None,
6460-
before=None)
6461-
self.assertEqual(0, num[0])
6462-
# Then archiving migrations should work.
6463-
num = sqlalchemy_api._archive_deleted_rows_for_table(self.metadata,
6464-
"migrations",
6465-
max_rows=None,
6466-
before=None)
6467-
self.assertEqual(1, num[0])
6468-
# Then archiving instances should work.
6463+
# Archiving instances should result in migrations related to the
6464+
# instances also being archived.
64696465
num = sqlalchemy_api._archive_deleted_rows_for_table(self.metadata,
64706466
"instances",
64716467
max_rows=None,

0 commit comments

Comments
 (0)