13
13
import backoff
14
14
import os
15
15
16
+ class PeerDiscoveryException (Exception ):
17
+ pass
18
+
16
19
def construct_service_record ():
17
20
# Drop our Pod's unique identity and replace with '_couchdb._tcp'
18
21
return os .getenv ('SRV_RECORD' ) or '.' .join (['_couchdb' , '_tcp' ] + socket .getfqdn ().split ('.' )[1 :])
@@ -22,29 +25,31 @@ def construct_service_record():
22
25
dns .resolver .NXDOMAIN ,
23
26
max_tries = 10
24
27
)
28
+ @backoff .on_exception (
29
+ backoff .expo ,
30
+ PeerDiscoveryException ,
31
+ max_tries = 10
32
+ )
25
33
def discover_peers (service_record ):
26
- expected_peers_count = os .getenv ('COUCHDB_CLUSTER_SIZE' )
34
+ expected_peers_count = int ( os .getenv ('COUCHDB_CLUSTER_SIZE' ) )
27
35
if expected_peers_count :
28
36
print ('Expecting' , expected_peers_count , 'peers...' )
29
37
else :
30
38
print ('Looks like COUCHDB_CLUSTER_SIZE is not set, will not wait for DNS...' )
31
- peers_count = 0
32
- while str (peers_count ) != expected_peers_count :
33
- print ('Resolving SRV record:' , service_record )
34
- # Erlang requires that we drop the trailing period from the absolute DNS
35
- # name to form the hostname used for the Erlang node. This feels hacky
36
- # but not sure of a more official answer
37
- answers = dns .resolver .query (service_record , 'SRV' )
38
- peers = [rdata .target .to_text ()[:- 1 ] for rdata in answers ]
39
- peers_count = len (peers )
40
- if expected_peers_count :
41
- print ('Discovered' , peers_count , 'of' , expected_peers_count , 'peers:' , peers )
42
- if str (peers_count ) != expected_peers_count :
43
- print ('Waiting for cluster DNS to fully propagate...' )
44
- time .sleep (5 )
45
- else :
46
- print ('Discovered' , peers_count , 'peers:' , peers )
47
- expected_peers_count = str (peers_count )
39
+ print ('Resolving SRV record:' , service_record )
40
+ # Erlang requires that we drop the trailing period from the absolute DNS
41
+ # name to form the hostname used for the Erlang node. This feels hacky
42
+ # but not sure of a more official answer
43
+ answers = dns .resolver .query (service_record , 'SRV' )
44
+ peers = [rdata .target .to_text ()[:- 1 ] for rdata in answers ]
45
+ peers_count = len (peers )
46
+ if expected_peers_count :
47
+ print ('Discovered' , peers_count , 'of' , expected_peers_count , 'peers:' , peers )
48
+ if peers_count != expected_peers_count :
49
+ print ('Waiting for cluster DNS to fully propagate...' )
50
+ raise PeerDiscoveryException
51
+ else :
52
+ print ('Discovered' , peers_count , 'peers:' , peers )
48
53
return peers
49
54
50
55
@backoff .on_exception (
0 commit comments