Skip to content

Commit 9060971

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Improve Hash Ring logs" into stable/2023.1
2 parents 1e568e9 + 52cc015 commit 9060971

File tree

4 files changed

+72
-12
lines changed

4 files changed

+72
-12
lines changed

neutron/common/ovn/exceptions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class StandardAttributeIDNotFound(n_exc.NeutronException):
3333

3434

3535
class HashRingIsEmpty(n_exc.NeutronException):
36-
message = _('Hash Ring returned empty when hashing "%(key)s". '
37-
'This should never happen in a normal situation, please '
38-
'check the status of your cluster')
36+
message = _('Hash Ring returned empty when hashing "%(key)s". All '
37+
'%(node_count)d nodes were found offline. This should never '
38+
'happen in a normal situation, please check the status '
39+
'of your cluster')

neutron/common/ovn/hash_ring_manager.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def __init__(self, group_name):
3838
# Flag to rate limit the caching log
3939
self._prev_num_nodes = -1
4040
self.admin_ctx = context.get_admin_context()
41+
self._offline_node_count = 0
4142

4243
@property
4344
def _wait_startup_before_caching(self):
@@ -92,6 +93,11 @@ def _load_hash_ring(self, refresh=False):
9293
self._hash_ring = hashring.HashRing({node.node_uuid
9394
for node in nodes})
9495
self._last_time_loaded = timeutils.utcnow()
96+
self._offline_node_count = db_hash_ring.count_offline_nodes(
97+
self.admin_ctx, constants.HASH_RING_NODES_TIMEOUT,
98+
self._group)
99+
LOG.debug("Hash Ring loaded. %d active nodes. %d offline nodes",
100+
len(nodes), self._offline_node_count)
95101

96102
def refresh(self):
97103
self._load_hash_ring(refresh=True)
@@ -108,4 +114,5 @@ def get_node(self, key):
108114
# KeyError is raised
109115
return self._hash_ring[key].pop()
110116
except KeyError:
111-
raise exceptions.HashRingIsEmpty(key=key)
117+
raise exceptions.HashRingIsEmpty(
118+
key=key, node_count=self._offline_node_count)

neutron/db/ovn_hash_ring_db.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717

1818
from neutron_lib.db import api as db_api
1919
from oslo_config import cfg
20+
from oslo_log import log
2021
from oslo_utils import timeutils
2122
from oslo_utils import uuidutils
2223

2324
from neutron.db.models import ovn as ovn_models
2425

2526
CONF = cfg.CONF
27+
LOG = log.getLogger(__name__)
2628

2729

2830
# NOTE(ralonsoh): this was migrated from networking-ovn to neutron and should
@@ -34,6 +36,8 @@ def add_node(context, group_name, node_uuid=None):
3436
with db_api.CONTEXT_WRITER.using(context):
3537
context.session.add(ovn_models.OVNHashRing(
3638
node_uuid=node_uuid, hostname=CONF.host, group_name=group_name))
39+
LOG.info('Node %s from host "%s" and group "%s" added to the Hash Ring',
40+
node_uuid, CONF.host, group_name)
3741
return node_uuid
3842

3943

@@ -42,6 +46,8 @@ def remove_nodes_from_host(context, group_name):
4246
context.session.query(ovn_models.OVNHashRing).filter(
4347
ovn_models.OVNHashRing.hostname == CONF.host,
4448
ovn_models.OVNHashRing.group_name == group_name).delete()
49+
LOG.info('Nodes from host "%s" and group "%s" removed from the Hash Ring',
50+
CONF.host, group_name)
4551

4652

4753
def _touch(context, **filter_args):
@@ -58,12 +64,31 @@ def touch_node(context, node_uuid):
5864
_touch(context, node_uuid=node_uuid)
5965

6066

61-
def get_active_nodes(context, interval, group_name, from_host=False):
67+
def _get_nodes_query(context, interval, group_name, offline=False,
68+
from_host=False):
6269
limit = timeutils.utcnow() - datetime.timedelta(seconds=interval)
63-
with db_api.CONTEXT_READER.using(context):
64-
query = context.session.query(ovn_models.OVNHashRing).filter(
65-
ovn_models.OVNHashRing.updated_at >= limit,
66-
ovn_models.OVNHashRing.group_name == group_name)
67-
if from_host:
68-
query = query.filter_by(hostname=CONF.host)
69-
return query.all()
70+
query = context.session.query(ovn_models.OVNHashRing).filter(
71+
ovn_models.OVNHashRing.group_name == group_name)
72+
73+
if offline:
74+
query = query.filter(ovn_models.OVNHashRing.updated_at < limit)
75+
else:
76+
query = query.filter(ovn_models.OVNHashRing.updated_at >= limit)
77+
78+
if from_host:
79+
query = query.filter_by(hostname=CONF.host)
80+
81+
return query
82+
83+
84+
@db_api.CONTEXT_READER
85+
def get_active_nodes(context, interval, group_name, from_host=False):
86+
query = _get_nodes_query(context, interval, group_name,
87+
from_host=from_host)
88+
return query.all()
89+
90+
91+
@db_api.CONTEXT_READER
92+
def count_offline_nodes(context, interval, group_name):
93+
query = _get_nodes_query(context, interval, group_name, offline=True)
94+
return query.count()

neutron/tests/unit/db/test_ovn_hash_ring_db.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,3 +242,30 @@ def test_touch_nodes_from_host_different_groups(self):
242242
for node in group2:
243243
node_db = self._get_node_row(node)
244244
self.assertEqual(node_db.created_at, node_db.updated_at)
245+
246+
def test_count_offline_nodes(self):
247+
self._add_nodes_and_assert_exists(count=3)
248+
249+
# Assert no nodes are considered offline
250+
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
251+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
252+
253+
# Subtract 60 seconds from utcnow() and touch the nodes to make
254+
# them to appear offline
255+
fake_utcnow = timeutils.utcnow() - datetime.timedelta(seconds=60)
256+
with mock.patch.object(timeutils, 'utcnow') as mock_utcnow:
257+
mock_utcnow.return_value = fake_utcnow
258+
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
259+
HASH_RING_TEST_GROUP)
260+
261+
# Now assert that all nodes from our host are seeing as offline
262+
self.assertEqual(3, ovn_hash_ring_db.count_offline_nodes(
263+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
264+
265+
# Touch the nodes again without faking utcnow()
266+
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
267+
HASH_RING_TEST_GROUP)
268+
269+
# Assert no nodes are considered offline
270+
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
271+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))

0 commit comments

Comments
 (0)