@@ -4098,64 +4098,132 @@ def task_log_end_task(context, task_name, period_beginning, period_ending,
4098
4098
##################
4099
4099
4100
4100
4101
- def _archive_if_instance_deleted (table , shadow_table , instances , conn ,
4102
- max_rows , before ):
4103
- """Look for records that pertain to deleted instances, but may not be
4104
- deleted themselves. This catches cases where we delete an instance,
4105
- but leave some residue because of a failure in a cleanup path or
4106
- similar.
4107
-
4108
- Logic is: if I have a column called instance_uuid, and that instance
4109
- is deleted, then I can be deleted.
4110
- """
4111
-
4112
- # NOTE(jake): handle instance_actions_events differently as it relies on
4113
- # instance_actions.id not instances.uuid
4114
- if table .name == "instance_actions_events" :
4115
- instance_actions = models .BASE .metadata .tables ["instance_actions" ]
4116
- query_select = sql .select (
4117
- [table ],
4118
- and_ (instances .c .deleted != instances .c .deleted .default .arg ,
4119
- instances .c .uuid == instance_actions .c .instance_uuid ,
4120
- instance_actions .c .id == table .c .action_id ))
4101
+ def _get_tables_with_fk_to_table (table ):
4102
+ """Get a list of tables that refer to the given table by foreign key (FK).
4121
4103
4122
- else :
4123
- query_select = sql .select (
4124
- [table ],
4125
- and_ (instances .c .deleted != instances .c .deleted .default .arg ,
4126
- instances .c .uuid == table .c .instance_uuid ))
4127
-
4128
- if before :
4129
- query_select = query_select .where (instances .c .deleted_at < before )
4104
+ :param table: Table object (parent) for which to find references by FK
4130
4105
4131
- query_select = query_select .order_by (table .c .id ).limit (max_rows )
4132
-
4133
- query_insert = shadow_table .insert (inline = True ).\
4134
- from_select ([c .name for c in table .c ], query_select )
4135
-
4136
- delete_statement = DeleteFromSelect (table , query_select ,
4137
- table .c .id )
4106
+ :returns: A list of Table objects that refer to the specified table by FK
4107
+ """
4108
+ tables = []
4109
+ for t in models .BASE .metadata .tables .values ():
4110
+ for fk in t .foreign_keys :
4111
+ if fk .references (table ):
4112
+ tables .append (t )
4113
+ return tables
4114
+
4115
+
4116
+ def _get_fk_stmts (metadata , conn , table , column , records ):
4117
+ """Find records related to this table by foreign key (FK) and create and
4118
+ return insert/delete statements for them.
4119
+
4120
+ Logic is: find the tables that reference the table passed to this method
4121
+ and walk the tree of references by FK. As child records are found, prepend
4122
+ them to deques to execute later in a single database transaction (to avoid
4123
+ orphaning related records if any one insert/delete fails or the archive
4124
+ process is otherwise interrupted).
4125
+
4126
+ :param metadata: Metadata object to use to construct a shadow Table object
4127
+ :param conn: Connection object to use to select records related by FK
4128
+ :param table: Table object (parent) for which to find references by FK
4129
+ :param column: Column object (parent) to use to select records related by
4130
+ FK
4131
+ :param records: A list of records (column values) to use to select records
4132
+ related by FK
4133
+
4134
+ :returns: tuple of (insert statements, delete statements) for records
4135
+ related by FK to insert into shadow tables and delete from main tables
4136
+ """
4137
+ inserts = collections .deque ()
4138
+ deletes = collections .deque ()
4139
+ fk_tables = _get_tables_with_fk_to_table (table )
4140
+ for fk_table in fk_tables :
4141
+ # Create the shadow table for the referencing table.
4142
+ fk_shadow_tablename = _SHADOW_TABLE_PREFIX + fk_table .name
4143
+ try :
4144
+ fk_shadow_table = Table (fk_shadow_tablename , metadata ,
4145
+ autoload = True )
4146
+ except NoSuchTableError :
4147
+ # No corresponding shadow table; skip it.
4148
+ continue
4138
4149
4139
- try :
4140
- with conn .begin ():
4141
- conn .execute (query_insert )
4142
- result_delete = conn .execute (delete_statement )
4143
- return result_delete .rowcount
4144
- except db_exc .DBReferenceError as ex :
4145
- LOG .warning ('Failed to archive %(table)s: %(error)s' ,
4146
- {'table' : table .name ,
4147
- 'error' : six .text_type (ex )})
4148
- return 0
4150
+ # TODO(stephenfin): Drop this when we drop the table
4151
+ if fk_table .name == "dns_domains" :
4152
+ # We have one table (dns_domains) where the key is called
4153
+ # "domain" rather than "id"
4154
+ fk_column = fk_table .c .domain
4155
+ else :
4156
+ fk_column = fk_table .c .id
4157
+
4158
+ for fk in fk_table .foreign_keys :
4159
+ # We need to find the records in the referring (child) table that
4160
+ # correspond to the records in our (parent) table so we can archive
4161
+ # them.
4162
+
4163
+ # First, select the column in the parent referenced by the child
4164
+ # table that corresponds to the parent table records that were
4165
+ # passed in.
4166
+ # Example: table = 'instances' and fk_table = 'instance_extra'
4167
+ # fk.parent = instance_extra.instance_uuid
4168
+ # fk.column = instances.uuid
4169
+ # SELECT instances.uuid FROM instances, instance_extra
4170
+ # WHERE instance_extra.instance_uuid = instances.uuid
4171
+ # AND instance.id IN (<ids>)
4172
+ # We need the instance uuids for the <ids> in order to
4173
+ # look up the matching instance_extra records.
4174
+ select = sql .select ([fk .column ]).where (
4175
+ sql .and_ (fk .parent == fk .column , column .in_ (records )))
4176
+ rows = conn .execute (select ).fetchall ()
4177
+ p_records = [r [0 ] for r in rows ]
4178
+ # Then, select rows in the child table that correspond to the
4179
+ # parent table records that were passed in.
4180
+ # Example: table = 'instances' and fk_table = 'instance_extra'
4181
+ # fk.parent = instance_extra.instance_uuid
4182
+ # fk.column = instances.uuid
4183
+ # SELECT instance_extra.id FROM instance_extra, instances
4184
+ # WHERE instance_extra.instance_uuid = instances.uuid
4185
+ # AND instances.uuid IN (<uuids>)
4186
+ # We will get the instance_extra ids we need to archive
4187
+ # them.
4188
+ fk_select = sql .select ([fk_column ]).where (
4189
+ sql .and_ (fk .parent == fk .column , fk .column .in_ (p_records )))
4190
+ fk_rows = conn .execute (fk_select ).fetchall ()
4191
+ fk_records = [r [0 ] for r in fk_rows ]
4192
+ if fk_records :
4193
+ # If we found any records in the child table, create shadow
4194
+ # table insert statements for them and prepend them to the
4195
+ # deque.
4196
+ fk_columns = [c .name for c in fk_table .c ]
4197
+ fk_insert = fk_shadow_table .insert (inline = True ).\
4198
+ from_select (fk_columns , sql .select ([fk_table ],
4199
+ fk_column .in_ (fk_records )))
4200
+ inserts .appendleft (fk_insert )
4201
+ # Create main table delete statements and prepend them to the
4202
+ # deque.
4203
+ fk_delete = fk_table .delete ().where (fk_column .in_ (fk_records ))
4204
+ deletes .appendleft (fk_delete )
4205
+ # Repeat for any possible nested child tables.
4206
+ i , d = _get_fk_stmts (metadata , conn , fk_table , fk_column , fk_records )
4207
+ inserts .extendleft (i )
4208
+ deletes .extendleft (d )
4209
+
4210
+ return inserts , deletes
4149
4211
4150
4212
4151
4213
def _archive_deleted_rows_for_table (metadata , tablename , max_rows , before ):
4152
4214
"""Move up to max_rows rows from one tables to the corresponding
4153
4215
shadow table.
4154
4216
4155
- :returns: 2-item tuple:
4217
+ Will also follow FK constraints and archive all referring rows.
4218
+ Example: archving a record from the 'instances' table will also archive
4219
+ the 'instance_extra' record before archiving the 'instances' record.
4220
+
4221
+ :returns: 3-item tuple:
4156
4222
4157
4223
- number of rows archived
4158
4224
- list of UUIDs of instances that were archived
4225
+ - number of extra rows archived (due to FK constraints)
4226
+ dict of {tablename: rows_archived}
4159
4227
"""
4160
4228
conn = metadata .bind .connect ()
4161
4229
# NOTE(tdurakov): table metadata should be received
@@ -4171,7 +4239,7 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
4171
4239
shadow_table = Table (shadow_tablename , metadata , autoload = True )
4172
4240
except NoSuchTableError :
4173
4241
# No corresponding shadow table; skip it.
4174
- return rows_archived , deleted_instance_uuids
4242
+ return rows_archived , deleted_instance_uuids , {}
4175
4243
4176
4244
# TODO(stephenfin): Drop this when we drop the table
4177
4245
if tablename == "dns_domains" :
@@ -4194,10 +4262,29 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
4194
4262
rows = conn .execute (select ).fetchall ()
4195
4263
records = [r [0 ] for r in rows ]
4196
4264
4265
+ # We will archive deleted rows for this table and also generate insert and
4266
+ # delete statements for extra rows we may archive by following FK
4267
+ # relationships. Because we are iterating over the sorted_tables (list of
4268
+ # Table objects sorted in order of foreign key dependency), new inserts and
4269
+ # deletes ("leaves") will be added to the fronts of the deques created in
4270
+ # _get_fk_stmts. This way, we make sure we delete child table records
4271
+ # before we delete their parent table records.
4272
+
4273
+ # Keep track of any extra tablenames to number of rows that we archive by
4274
+ # following FK relationships.
4275
+ # {tablename: extra_rows_archived}
4276
+ extras = collections .defaultdict (int )
4197
4277
if records :
4198
4278
insert = shadow_table .insert (inline = True ).\
4199
4279
from_select (columns , sql .select ([table ], column .in_ (records )))
4200
4280
delete = table .delete ().where (column .in_ (records ))
4281
+ # Walk FK relationships and add insert/delete statements for rows that
4282
+ # refer to this table via FK constraints. fk_inserts and fk_deletes
4283
+ # will be prepended to by _get_fk_stmts if referring rows are found by
4284
+ # FK constraints.
4285
+ fk_inserts , fk_deletes = _get_fk_stmts (
4286
+ metadata , conn , table , column , records )
4287
+
4201
4288
# NOTE(tssurya): In order to facilitate the deletion of records from
4202
4289
# instance_mappings, request_specs and instance_group_member tables in
4203
4290
# the nova_api DB, the rows of deleted instances from the instances
@@ -4211,9 +4298,14 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
4211
4298
try :
4212
4299
# Group the insert and delete in a transaction.
4213
4300
with conn .begin ():
4301
+ for fk_insert in fk_inserts :
4302
+ conn .execute (fk_insert )
4303
+ for fk_delete in fk_deletes :
4304
+ result_fk_delete = conn .execute (fk_delete )
4305
+ extras [fk_delete .table .name ] += result_fk_delete .rowcount
4214
4306
conn .execute (insert )
4215
4307
result_delete = conn .execute (delete )
4216
- rows_archived = result_delete .rowcount
4308
+ rows_archived + = result_delete .rowcount
4217
4309
except db_exc .DBReferenceError as ex :
4218
4310
# A foreign key constraint keeps us from deleting some of
4219
4311
# these rows until we clean up a dependent table. Just
@@ -4222,22 +4314,7 @@ def _archive_deleted_rows_for_table(metadata, tablename, max_rows, before):
4222
4314
"%(tablename)s: %(error)s" ,
4223
4315
{'tablename' : tablename , 'error' : six .text_type (ex )})
4224
4316
4225
- # NOTE(jake): instance_actions_events doesn't have a instance_uuid column
4226
- # but still needs to be archived as it is a FK constraint
4227
- if ((max_rows is None or rows_archived < max_rows ) and
4228
- # NOTE(melwitt): The pci_devices table uses the 'instance_uuid'
4229
- # column to track the allocated association of a PCI device and its
4230
- # records are not tied to the lifecycles of instance records.
4231
- (tablename != 'pci_devices' and
4232
- 'instance_uuid' in columns or
4233
- tablename == 'instance_actions_events' )):
4234
- instances = models .BASE .metadata .tables ['instances' ]
4235
- limit = max_rows - rows_archived if max_rows is not None else None
4236
- extra = _archive_if_instance_deleted (table , shadow_table , instances ,
4237
- conn , limit , before )
4238
- rows_archived += extra
4239
-
4240
- return rows_archived , deleted_instance_uuids
4317
+ return rows_archived , deleted_instance_uuids , extras
4241
4318
4242
4319
4243
4320
def archive_deleted_rows (context = None , max_rows = None , before = None ):
@@ -4261,21 +4338,26 @@ def archive_deleted_rows(context=None, max_rows=None, before=None):
4261
4338
- list of UUIDs of instances that were archived
4262
4339
- total number of rows that were archived
4263
4340
"""
4264
- table_to_rows_archived = {}
4341
+ table_to_rows_archived = collections . defaultdict ( int )
4265
4342
deleted_instance_uuids = []
4266
4343
total_rows_archived = 0
4267
4344
meta = MetaData (get_engine (use_slave = True , context = context ))
4268
4345
meta .reflect ()
4269
- # Reverse sort the tables so we get the leaf nodes first for processing.
4270
- for table in reversed (meta .sorted_tables ):
4346
+ # Get the sorted list of tables in order of foreign key dependency.
4347
+ # Process the parent tables and find their dependent records in order to
4348
+ # archive the related records in a single database transactions. The goal
4349
+ # is to avoid a situation where, for example, an 'instances' table record
4350
+ # is missing its corresponding 'instance_extra' record due to running the
4351
+ # archive_deleted_rows command with max_rows.
4352
+ for table in meta .sorted_tables :
4271
4353
tablename = table .name
4272
4354
rows_archived = 0
4273
4355
# skip the special sqlalchemy-migrate migrate_version table and any
4274
4356
# shadow tables
4275
4357
if (tablename == 'migrate_version' or
4276
4358
tablename .startswith (_SHADOW_TABLE_PREFIX )):
4277
4359
continue
4278
- rows_archived , _deleted_instance_uuids = (
4360
+ rows_archived , _deleted_instance_uuids , extras = (
4279
4361
_archive_deleted_rows_for_table (
4280
4362
meta , tablename ,
4281
4363
max_rows = max_rows - total_rows_archived ,
@@ -4286,6 +4368,9 @@ def archive_deleted_rows(context=None, max_rows=None, before=None):
4286
4368
# Only report results for tables that had updates.
4287
4369
if rows_archived :
4288
4370
table_to_rows_archived [tablename ] = rows_archived
4371
+ for tablename , extra_rows_archived in extras .items ():
4372
+ table_to_rows_archived [tablename ] += extra_rows_archived
4373
+ total_rows_archived += extra_rows_archived
4289
4374
if total_rows_archived >= max_rows :
4290
4375
break
4291
4376
return table_to_rows_archived , deleted_instance_uuids , total_rows_archived
0 commit comments