Skip to content

Commit 4a50dd0

Browse files
committed
[GPII-3624]: Improve peer discovery mechanics
1 parent aa5bb21 commit 4a50dd0

File tree

1 file changed

+24
-7
lines changed

1 file changed

+24
-7
lines changed

mem3_helper.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,29 @@ def construct_service_record():
2323
max_tries=10
2424
)
2525
def discover_peers(service_record):
26-
print ('Resolving SRV record', service_record)
27-
answers = dns.resolver.query(service_record, 'SRV')
28-
# Erlang requires that we drop the trailing period from the absolute DNS
29-
# name to form the hostname used for the Erlang node. This feels hacky
30-
# but not sure of a more official answer
31-
return [rdata.target.to_text()[:-1] for rdata in answers]
26+
expected_peers_count = os.getenv('COUCHDB_CLUSTER_SIZE')
27+
if expected_peers_count:
28+
print('Expecting', expected_peers_count, 'peers...')
29+
else:
30+
print('Looks like COUCHDB_CLUSTER_SIZE is not set, will not wait for DNS...')
31+
peers_count = 0
32+
while str(peers_count) != expected_peers_count:
33+
print('Resolving SRV record:', service_record)
34+
# Erlang requires that we drop the trailing period from the absolute DNS
35+
# name to form the hostname used for the Erlang node. This feels hacky
36+
# but not sure of a more official answer
37+
answers = dns.resolver.query(service_record, 'SRV')
38+
peers = [rdata.target.to_text()[:-1] for rdata in answers]
39+
peers_count = len(peers)
40+
if expected_peers_count:
41+
print('Discovered', peers_count, 'of', expected_peers_count, 'peers:', peers)
42+
if str(peers_count) != expected_peers_count:
43+
print('Waiting for cluster DNS to fully propagate...')
44+
time.sleep(5)
45+
else:
46+
print('Discovered', peers_count, 'peers:', peers)
47+
expected_peers_count = str(peers_count)
48+
return peers
3249

3350
@backoff.on_exception(
3451
backoff.expo,
@@ -45,7 +62,7 @@ def connect_the_dots(names):
4562
else:
4663
resp = requests.put(uri, data=json.dumps(doc))
4764
while resp.status_code == 404:
48-
print('Waiting for _nodes DB to be created ...')
65+
print('Waiting for _nodes DB to be created...')
4966
time.sleep(5)
5067
resp = requests.put(uri, data=json.dumps(doc))
5168
print('Adding cluster member', name, resp.status_code)

0 commit comments

Comments
 (0)