10
10
import sys
11
11
import dns .resolver
12
12
import collections
13
+ from typing import List , Dict , Union
13
14
14
15
NSEEDS = 512
15
16
16
- MAX_SEEDS_PER_ASN = 2
17
+ MAX_SEEDS_PER_ASN = {
18
+ 'ipv4' : 2 ,
19
+ 'ipv6' : 10 ,
20
+ }
17
21
18
- MIN_BLOCKS = 337600
22
+ MIN_BLOCKS = 730000
19
23
20
24
# These are hosts that have been observed to be behaving strangely (e.g.
21
25
# aggressively connecting to every node).
40
44
r"23.99"
41
45
r")" )
42
46
43
- def parseline (line ):
47
+ def parseline (line : str ) -> Union [dict , None ]:
48
+ """ Parses a line from `seeds_main.txt` into a dictionary of details for that line.
49
+ or `None`, if the line could not be parsed.
50
+ """
44
51
sline = line .split ()
45
52
if len (sline ) < 11 :
53
+ # line too short to be valid, skip it.
46
54
return None
47
55
m = PATTERN_IPV4 .match (sline [0 ])
48
56
sortkey = None
@@ -107,25 +115,26 @@ def parseline(line):
107
115
'sortkey' : sortkey ,
108
116
}
109
117
110
- def dedup (ips ) :
111
- '''deduplicate by address, port'''
118
+ def dedup (ips : List [ Dict ]) -> List [ Dict ] :
119
+ """ Remove duplicates from `ips` where multiple ips share address and port. """
112
120
d = {}
113
121
for ip in ips :
114
122
d [ip ['ip' ],ip ['port' ]] = ip
115
123
return list (d .values ())
116
124
117
- def filtermultiport (ips ) :
118
- ''' Filter out hosts with more nodes per IP'''
125
+ def filtermultiport (ips : List [ Dict ]) -> List [ Dict ] :
126
+ """ Filter out hosts with more nodes per IP"""
119
127
hist = collections .defaultdict (list )
120
128
for ip in ips :
121
129
hist [ip ['sortkey' ]].append (ip )
122
130
return [value [0 ] for (key ,value ) in list (hist .items ()) if len (value )== 1 ]
123
131
124
- def lookup_asn (net , ip ):
125
- '''
126
- Look up the asn for an IP (4 or 6) address by querying cymru.com, or None
127
- if it could not be found.
128
- '''
132
+ def lookup_asn (net : str , ip : str ) -> Union [int , None ]:
133
+ """ Look up the asn for an `ip` address by querying cymru.com
134
+ on network `net` (e.g. ipv4 or ipv6).
135
+
136
+ Returns in integer ASN or None if it could not be found.
137
+ """
129
138
try :
130
139
if net == 'ipv4' :
131
140
ipaddr = ip
@@ -147,20 +156,33 @@ def lookup_asn(net, ip):
147
156
return None
148
157
149
158
# Based on Greg Maxwell's seed_filter.py
150
- def filterbyasn (ips , max_per_asn , max_per_net ):
159
+ def filterbyasn (ips : List [Dict ], max_per_asn : Dict , max_per_net : int ) -> List [Dict ]:
160
+ """ Prunes `ips` by
161
+ (a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
162
+ (b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
163
+ """
151
164
# Sift out ips by type
152
165
ips_ipv46 = [ip for ip in ips if ip ['net' ] in ['ipv4' , 'ipv6' ]]
153
166
ips_onion = [ip for ip in ips if ip ['net' ] == 'onion' ]
154
167
155
168
# Filter IPv46 by ASN, and limit to max_per_net per network
156
169
result = []
157
- net_count = collections .defaultdict (int )
158
- asn_count = collections .defaultdict (int )
159
- for ip in ips_ipv46 :
170
+ net_count : Dict [str , int ] = collections .defaultdict (int )
171
+ asn_count : Dict [int , int ] = collections .defaultdict (int )
172
+
173
+ for i , ip in enumerate (ips_ipv46 ):
174
+ if i % 10 == 0 :
175
+ # give progress update
176
+ print (f"{ i :6d} /{ len (ips_ipv46 )} [{ 100 * i / len (ips_ipv46 ):04.1f} %]\r " , file = sys .stderr , end = '' , flush = True )
177
+
160
178
if net_count [ip ['net' ]] == max_per_net :
179
+ # do not add this ip as we already too many
180
+ # ips from this network
161
181
continue
162
182
asn = lookup_asn (ip ['net' ], ip ['ip' ])
163
- if asn is None or asn_count [asn ] == max_per_asn :
183
+ if asn is None or asn_count [asn ] == max_per_asn [ip ['net' ]]:
184
+ # do not add this ip as we already have too many
185
+ # ips from this ASN on this network
164
186
continue
165
187
asn_count [asn ] += 1
166
188
net_count [ip ['net' ]] += 1
@@ -170,54 +192,55 @@ def filterbyasn(ips, max_per_asn, max_per_net):
170
192
result .extend (ips_onion [0 :max_per_net ])
171
193
return result
172
194
173
- def ip_stats (ips ):
174
- hist = collections .defaultdict (int )
195
+ def ip_stats (ips : List [Dict ]) -> str :
196
+ """ Format and return pretty string from `ips`. """
197
+ hist : Dict [str , int ] = collections .defaultdict (int )
175
198
for ip in ips :
176
199
if ip is not None :
177
200
hist [ip ['net' ]] += 1
178
201
179
- return '%6d %6d %6d' % ( hist ['ipv4' ], hist ['ipv6' ], hist ['onion' ])
202
+ return f" { hist ['ipv4' ]:6d } { hist ['ipv6' ]:6d } { hist ['onion' ]:6d } "
180
203
181
204
def main ():
182
205
lines = sys .stdin .readlines ()
183
206
ips = [parseline (line ) for line in lines ]
184
207
185
208
print ('\x1b [7m IPv4 IPv6 Onion Pass \x1b [0m' , file = sys .stderr )
186
- print ('%s Initial' % ( ip_stats (ips )) , file = sys .stderr )
209
+ print (f' { ip_stats (ips ):s } Initial' , file = sys .stderr )
187
210
# Skip entries with invalid address.
188
211
ips = [ip for ip in ips if ip is not None ]
189
- print ('%s Skip entries with invalid address' % ( ip_stats ( ips )) , file = sys .stderr )
212
+ print (f' { ip_stats ( ips ):s } Skip entries with invalid address' , file = sys .stderr )
190
213
# Skip duplicates (in case multiple seeds files were concatenated)
191
214
ips = dedup (ips )
192
- print ('%s After removing duplicates' % ( ip_stats ( ips )) , file = sys .stderr )
215
+ print (f' { ip_stats ( ips ):s } After removing duplicates' , file = sys .stderr )
193
216
# Skip entries from suspicious hosts.
194
217
ips = [ip for ip in ips if ip ['ip' ] not in SUSPICIOUS_HOSTS ]
195
- print ('%s Skip entries from suspicious hosts' % ( ip_stats ( ips )) , file = sys .stderr )
218
+ print (f' { ip_stats ( ips ):s } Skip entries from suspicious hosts' , file = sys .stderr )
196
219
# Enforce minimal number of blocks.
197
220
ips = [ip for ip in ips if ip ['blocks' ] >= MIN_BLOCKS ]
198
- print ('%s Enforce minimal number of blocks' % ( ip_stats ( ips )) , file = sys .stderr )
221
+ print (f' { ip_stats ( ips ):s } Enforce minimal number of blocks' , file = sys .stderr )
199
222
# Require service bit 1.
200
223
ips = [ip for ip in ips if (ip ['service' ] & 1 ) == 1 ]
201
- print ('%s Require service bit 1' % ( ip_stats ( ips )) , file = sys .stderr )
224
+ print (f' { ip_stats ( ips ):s } Require service bit 1' , file = sys .stderr )
202
225
# Require at least 50% 30-day uptime for clearnet, 10% for onion.
203
226
req_uptime = {
204
227
'ipv4' : 50 ,
205
228
'ipv6' : 50 ,
206
229
'onion' : 10 ,
207
230
}
208
231
ips = [ip for ip in ips if ip ['uptime' ] > req_uptime [ip ['net' ]]]
209
- print ('%s Require minimum uptime' % ( ip_stats ( ips )) , file = sys .stderr )
232
+ print (f' { ip_stats ( ips ):s } Require minimum uptime' , file = sys .stderr )
210
233
# Require a known and recent user agent.
211
234
ips = [ip for ip in ips if PATTERN_AGENT .match (ip ['agent' ])]
212
- print ('%s Require a known and recent user agent' % ( ip_stats ( ips )) , file = sys .stderr )
235
+ print (f' { ip_stats ( ips ):s } Require a known and recent user agent' , file = sys .stderr )
213
236
# Sort by availability (and use last success as tie breaker)
214
237
ips .sort (key = lambda x : (x ['uptime' ], x ['lastsuccess' ], x ['ip' ]), reverse = True )
215
238
# Filter out hosts with multiple bitcoin ports, these are likely abusive
216
239
ips = filtermultiport (ips )
217
- print ('%s Filter out hosts with multiple bitcoin ports' % ( ip_stats ( ips )) , file = sys .stderr )
240
+ print (f' { ip_stats ( ips ):s } Filter out hosts with multiple bitcoin ports' , file = sys .stderr )
218
241
# Look up ASNs and limit results, both per ASN and globally.
219
242
ips = filterbyasn (ips , MAX_SEEDS_PER_ASN , NSEEDS )
220
- print ('%s Look up ASNs and limit results per ASN and per net' % ( ip_stats ( ips )) , file = sys .stderr )
243
+ print (f' { ip_stats ( ips ):s } Look up ASNs and limit results per ASN and per net' , file = sys .stderr )
221
244
# Sort the results by IP address (for deterministic output).
222
245
ips .sort (key = lambda x : (x ['net' ], x ['sortkey' ]))
223
246
for ip in ips :
0 commit comments