Skip to content

Commit 03fe127

Browse files
markgoddardjovial
authored andcommitted
Fix inactive session error in compute node creation
In the fix for bug 1839560 [1][2], soft-deleted compute nodes may be restored, to ensure we can reuse ironic node UUIDs as compute node UUIDs. While this seems to largely work, it results in some nasty errors being generated [3]: InvalidRequestError This session is in 'inactive' state, due to the SQL transaction being rolled back; no further SQL can be emitted within this transaction. This happens because compute_node_create is decorated with pick_context_manager_writer, which begins a transaction. While _compute_node_get_and_update_deleted claims that calling a second pick_context_manager_writer decorated function will begin a new subtransaction, this does not appear to be the case. This change removes pick_context_manager_writer from the compute_node_create function, and adds a new _compute_node_create function which ensures the transaction is finished if _compute_node_get_and_update_deleted is called. The new unit test added here fails without this change. This change marks the removal of the final FIXME from the functional test added in [4]. [1] https://bugs.launchpad.net/nova/+bug/1839560 [2] https://git.openstack.org/cgit/openstack/nova/commit/?id=89dd74ac7f1028daadf86cb18948e27fe9d1d411 [3] http://paste.openstack.org/show/786350/ [4] https://review.opendev.org/#/c/695012/ Change-Id: Iae119ea8776bc7f2e5dbe2e502a743217beded73 Closes-Bug: #1853159 Related-Bug: #1853009 (cherry picked from commit 54038b7f914d624a6684b5c0f168bdf84872a60c)
1 parent 054b08e commit 03fe127

File tree

3 files changed

+29
-35
lines changed

3 files changed

+29
-35
lines changed

nova/db/sqlalchemy/api.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -690,16 +690,29 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
690690

691691

692692
@pick_context_manager_writer
693-
def compute_node_create(context, values):
693+
def _compute_node_create(context, values):
694694
"""Creates a new ComputeNode and populates the capacity fields
695695
with the most recent data.
696696
"""
697697
convert_objects_related_datetimes(values)
698698

699699
compute_node_ref = models.ComputeNode()
700700
compute_node_ref.update(values)
701+
compute_node_ref.save(context.session)
702+
return compute_node_ref
703+
704+
705+
# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer,
706+
# so that we get a separate transaction in the exception handler. This avoids
707+
# an error message about inactive DB sessions during a transaction rollback.
708+
# See https://bugs.launchpad.net/nova/+bug/1853159.
709+
def compute_node_create(context, values):
710+
"""Creates a new ComputeNode and populates the capacity fields
711+
with the most recent data. Will restore a soft deleted compute node if a
712+
UUID has been explicitly requested.
713+
"""
701714
try:
702-
compute_node_ref.save(context.session)
715+
compute_node_ref = _compute_node_create(context, values)
703716
except db_exc.DBDuplicateEntry:
704717
with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
705718
# Check to see if we have a (soft) deleted ComputeNode with the

nova/tests/functional/regressions/test_bug_1853009.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,6 @@ def test_node_rebalance_deleted_compute_node_race(self):
8585

8686
# host_b[1]: Finds no compute record in RT. Tries to create one
8787
# (_init_compute_node).
88-
# FIXME(mgoddard): This shows a traceback with SQL rollback due to
89-
# soft-deleted node. The create seems to succeed but breaks the RT
90-
# update for this node. See
91-
# https://bugs.launchpad.net/nova/+bug/1853159.
9288
host_b.manager.update_available_resource(self.ctxt)
9389
self._assert_hypervisor_api(self.nodename, expected_host='host_b')
9490
# There should only be one resource provider (fake-node).
@@ -164,41 +160,12 @@ def test_node_rebalance_deleted_compute_node_race(self):
164160
self.ctxt, cn, cascade=True)
165161

166162
# host_b[3]: Should recreate compute node and resource provider.
167-
# FIXME(mgoddard): Resource provider not recreated here, due to
168-
# https://bugs.launchpad.net/nova/+bug/1853159.
169163
host_b.manager.update_available_resource(self.ctxt)
170164

171165
# Verify that the node was recreated.
172166
self._assert_hypervisor_api(self.nodename, 'host_b')
173167

174-
# But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute
175-
# node is not cached in the RT.
176-
self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes)
177-
178-
# There is no RP.
179-
rps = self._get_all_providers()
180-
self.assertEqual(0, len(rps), rps)
181-
182-
# But the RP exists in the provider tree.
183-
self.assertFalse(host_b.manager.rt.reportclient._provider_tree.exists(
184-
self.nodename))
185-
186-
# host_b[1]: Should add compute node to RT cache and recreate resource
187-
# provider.
188-
host_b.manager.update_available_resource(self.ctxt)
189-
190-
# Verify that the node still exists.
191-
self._assert_hypervisor_api(self.nodename, 'host_b')
192-
193-
# And it is now in the RT cache.
194-
self.assertIn(self.nodename, host_b.manager.rt.compute_nodes)
195-
196168
# The resource provider has now been created.
197169
rps = self._get_all_providers()
198170
self.assertEqual(1, len(rps), rps)
199171
self.assertEqual(self.nodename, rps[0]['name'])
200-
201-
# This fails due to the lack of a resource provider.
202-
self.assertIn(
203-
'Skipping removal of allocations for deleted instances',
204-
self.stdlog.logger.output)

nova/tests/unit/db/test_db_api.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5283,6 +5283,20 @@ def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
52835283
self.assertRaises(db_exc.DBDuplicateEntry,
52845284
db.compute_node_create, self.ctxt, other_node)
52855285

5286+
def test_compute_node_create_duplicate_uuid(self):
5287+
"""Tests to make sure that no exception is raised when trying to create
5288+
a compute node with the same host, hypervisor_hostname and uuid values
5289+
as another compute node that was previously soft-deleted.
5290+
"""
5291+
# Prior to fixing https://bugs.launchpad.net/nova/+bug/1853159, this
5292+
# raised the following error:
5293+
# sqlalchemy.exc.InvalidRequestError: This session is in 'inactive'
5294+
# state, due to the SQL transaction being rolled back; no further SQL
5295+
# can be emitted within this transaction.
5296+
db.compute_node_delete(self.ctxt, self.item['id'], self.item['host'])
5297+
new_node = db.compute_node_create(self.ctxt, self.compute_node_dict)
5298+
self.assertEqual(self.item['uuid'], new_node['uuid'])
5299+
52865300
def test_compute_node_get_all(self):
52875301
nodes = db.compute_node_get_all(self.ctxt)
52885302
self.assertEqual(1, len(nodes))

0 commit comments

Comments
 (0)