Skip to content

Commit 1be47d1

Browse files
committed
Backoff on exception instead of waiting in while loop
1 parent 4a50dd0 commit 1be47d1

File tree

1 file changed

+23
-18
lines changed

1 file changed

+23
-18
lines changed

mem3_helper.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
import backoff
1414
import os
1515

16+
class PeerDiscoveryException(Exception):
17+
pass
18+
1619
def construct_service_record():
1720
# Drop our Pod's unique identity and replace with '_couchdb._tcp'
1821
return os.getenv('SRV_RECORD') or '.'.join(['_couchdb', '_tcp'] + socket.getfqdn().split('.')[1:])
@@ -22,29 +25,31 @@ def construct_service_record():
2225
dns.resolver.NXDOMAIN,
2326
max_tries=10
2427
)
28+
@backoff.on_exception(
29+
backoff.expo,
30+
PeerDiscoveryException,
31+
max_tries=10
32+
)
2533
def discover_peers(service_record):
26-
expected_peers_count = os.getenv('COUCHDB_CLUSTER_SIZE')
34+
expected_peers_count = int(os.getenv('COUCHDB_CLUSTER_SIZE'))
2735
if expected_peers_count:
2836
print('Expecting', expected_peers_count, 'peers...')
2937
else:
3038
print('Looks like COUCHDB_CLUSTER_SIZE is not set, will not wait for DNS...')
31-
peers_count = 0
32-
while str(peers_count) != expected_peers_count:
33-
print('Resolving SRV record:', service_record)
34-
# Erlang requires that we drop the trailing period from the absolute DNS
35-
# name to form the hostname used for the Erlang node. This feels hacky
36-
# but not sure of a more official answer
37-
answers = dns.resolver.query(service_record, 'SRV')
38-
peers = [rdata.target.to_text()[:-1] for rdata in answers]
39-
peers_count = len(peers)
40-
if expected_peers_count:
41-
print('Discovered', peers_count, 'of', expected_peers_count, 'peers:', peers)
42-
if str(peers_count) != expected_peers_count:
43-
print('Waiting for cluster DNS to fully propagate...')
44-
time.sleep(5)
45-
else:
46-
print('Discovered', peers_count, 'peers:', peers)
47-
expected_peers_count = str(peers_count)
39+
print('Resolving SRV record:', service_record)
40+
# Erlang requires that we drop the trailing period from the absolute DNS
41+
# name to form the hostname used for the Erlang node. This feels hacky
42+
# but not sure of a more official answer
43+
answers = dns.resolver.query(service_record, 'SRV')
44+
peers = [rdata.target.to_text()[:-1] for rdata in answers]
45+
peers_count = len(peers)
46+
if expected_peers_count:
47+
print('Discovered', peers_count, 'of', expected_peers_count, 'peers:', peers)
48+
if peers_count != expected_peers_count:
49+
print('Waiting for cluster DNS to fully propagate...')
50+
raise PeerDiscoveryException
51+
else:
52+
print('Discovered', peers_count, 'peers:', peers)
4853
return peers
4954

5055
@backoff.on_exception(

0 commit comments

Comments
 (0)