|
30 | 30 | PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$")
|
31 | 31 | PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$")
|
32 | 32 | PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$")
|
33 |
| -PATTERN_AGENT = re.compile(r"^(/Satoshi:0.14.(0|1|2|99)/|/Satoshi:0.15.(0|1|2|99)|/Satoshi:0.16.(0|1|2|99)/)$") |
| 33 | +PATTERN_AGENT = re.compile( |
| 34 | + r"^/Satoshi:(" |
| 35 | + r"0.14.(0|1|2|3|99)|" |
| 36 | + r"0.15.(0|1|2|99)|" |
| 37 | + r"0.16.(0|1|2|3|99)|" |
| 38 | + r"0.17.(0|0.1|1|2|99)|" |
| 39 | + r"0.18.(0|1|99)|" |
| 40 | + r"0.19.99" |
| 41 | + r")") |
34 | 42 |
|
35 | 43 | def parseline(line):
|
36 | 44 | sline = line.split()
|
@@ -99,6 +107,13 @@ def parseline(line):
|
99 | 107 | 'sortkey': sortkey,
|
100 | 108 | }
|
101 | 109 |
|
| 110 | +def dedup(ips): |
| 111 | + '''deduplicate by address''' |
| 112 | + d = {} |
| 113 | + for ip in ips: |
| 114 | + d[ip['ip']] = ip |
| 115 | + return list(d.values()) |
| 116 | + |
102 | 117 | def filtermultiport(ips):
|
103 | 118 | '''Filter out hosts with more nodes per IP'''
|
104 | 119 | hist = collections.defaultdict(list)
|
@@ -146,29 +161,54 @@ def filterbyasn(ips, max_per_asn, max_total):
|
146 | 161 | result.extend(ips_onion)
|
147 | 162 | return result
|
148 | 163 |
|
| 164 | +def ip_stats(ips): |
| 165 | + hist = collections.defaultdict(int) |
| 166 | + for ip in ips: |
| 167 | + if ip is not None: |
| 168 | + hist[ip['net']] += 1 |
| 169 | + |
| 170 | + return 'IPv4 %d, IPv6 %d, Onion %d' % (hist['ipv4'], hist['ipv6'], hist['onion']) |
| 171 | + |
149 | 172 | def main():
|
150 | 173 | lines = sys.stdin.readlines()
|
151 | 174 | ips = [parseline(line) for line in lines]
|
152 | 175 |
|
153 |
| - # Skip entries with valid address. |
| 176 | + print('Initial: %s' % (ip_stats(ips)), file=sys.stderr) |
| 177 | + # Skip entries with invalid address. |
154 | 178 | ips = [ip for ip in ips if ip is not None]
|
| 179 | + print('Skip entries with invalid address: %s' % (ip_stats(ips)), file=sys.stderr) |
| 180 | + # Skip duplicattes (in case multiple seeds files were concatenated) |
| 181 | + ips = dedup(ips) |
| 182 | + print('After removing duplicates: %s' % (ip_stats(ips)), file=sys.stderr) |
155 | 183 | # Skip entries from suspicious hosts.
|
156 | 184 | ips = [ip for ip in ips if ip['ip'] not in SUSPICIOUS_HOSTS]
|
| 185 | + print('Skip entries from suspicious hosts: %s' % (ip_stats(ips)), file=sys.stderr) |
157 | 186 | # Enforce minimal number of blocks.
|
158 | 187 | ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS]
|
| 188 | + print('Enforce minimal number of blocks: %s' % (ip_stats(ips)), file=sys.stderr) |
159 | 189 | # Require service bit 1.
|
160 | 190 | ips = [ip for ip in ips if (ip['service'] & 1) == 1]
|
161 |
| - # Require at least 50% 30-day uptime. |
162 |
| - ips = [ip for ip in ips if ip['uptime'] > 50] |
| 191 | + print('Require service bit 1: %s' % (ip_stats(ips)), file=sys.stderr) |
| 192 | + # Require at least 50% 30-day uptime for clearnet, 10% for onion. |
| 193 | + req_uptime = { |
| 194 | + 'ipv4': 50, |
| 195 | + 'ipv6': 50, |
| 196 | + 'onion': 10, |
| 197 | + } |
| 198 | + ips = [ip for ip in ips if ip['uptime'] > req_uptime[ip['net']]] |
| 199 | + print('Require minimum uptime: %s' % (ip_stats(ips)), file=sys.stderr) |
163 | 200 | # Require a known and recent user agent.
|
164 | 201 | ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])]
|
| 202 | + print('Require a known and recent user agent: %s' % (ip_stats(ips)), file=sys.stderr) |
165 | 203 | # Sort by availability (and use last success as tie breaker)
|
166 | 204 | ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True)
|
167 | 205 | # Filter out hosts with multiple bitcoin ports, these are likely abusive
|
168 | 206 | ips = filtermultiport(ips)
|
| 207 | + print('Filter out hosts with multiple bitcoin ports: %s' % (ip_stats(ips)), file=sys.stderr) |
169 | 208 | # Look up ASNs and limit results, both per ASN and globally.
|
170 | 209 | ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
|
171 | 210 | # Sort the results by IP address (for deterministic output).
|
| 211 | + print('Look up ASNs and limit results, both per ASN and globally: %s' % (ip_stats(ips)), file=sys.stderr) |
172 | 212 | ips.sort(key=lambda x: (x['net'], x['sortkey']))
|
173 | 213 |
|
174 | 214 | for ip in ips:
|
|
0 commit comments