Skip to content

Commit b541803

Browse files
laanwjsipajamesobportlandhodl
committed
contrib: Use asmap for ASN lookup in makeseeds
Add an argument `-a` to provide a asmap file to do the IP to ASN lookups. This speeds up the script greatly, and makes the output deterministic. Also removes the dependency on `dns.lookup`. I've annotated the output with ASxxxx comments to provide a way to verify the functionality. For now I've added instructions in README.md to download and use the `demo.map` from the asmap repository. When we have some other mechanism for distributing asmap files we could switch to that. This continues #24824. I've removed all the fallbacks and extra complexity, as everyone will be using the same instructions anyway. Co-authored-by: Pieter Wuille <[email protected]> Co-authored-by: James O'Beirne <[email protected]> Co-authored-by: russeree <[email protected]>
1 parent bd6c5e4 commit b541803

File tree

4 files changed

+120
-48
lines changed

4 files changed

+120
-48
lines changed

contrib/seeds/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
seeds_main.txt
2+
asmap-filled.dat

contrib/seeds/README.md

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,11 @@ and remove old versions as necessary (at a minimum when GetDesirableServiceFlags
88
changes its default return value, as those are the services which seeds are added
99
to addrman with).
1010

11-
The seeds compiled into the release are created from sipa's DNS seed data, like this:
11+
The seeds compiled into the release are created from sipa's DNS seed and AS map
12+
data. Run the following commands from the `/contrib/seeds` directory:
1213

1314
curl https://bitcoin.sipa.be/seeds.txt.gz | gzip -dc > seeds_main.txt
14-
python3 makeseeds.py < seeds_main.txt > nodes_main.txt
15+
curl https://bitcoin.sipa.be/asmap-filled.dat > asmap-filled.dat
16+
python3 makeseeds.py -a asmap-filled.dat < seeds_main.txt > nodes_main.txt
1517
cat nodes_main_manual.txt >> nodes_main.txt
1618
python3 generate-seeds.py . > ../../src/chainparamsseeds.h
17-
18-
## Dependencies
19-
20-
Ubuntu, Debian:
21-
22-
sudo apt-get install python3-dnspython
23-
24-
and/or for other operating systems:
25-
26-
pip install dnspython
27-
28-
See https://dnspython.readthedocs.io/en/latest/installation.html for more information.

contrib/seeds/asmap.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2013-2020 The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
import ipaddress
6+
7+
# Convert a byte array to a bit array
8+
def DecodeBytes(byts):
9+
return [(byt >> i) & 1 for byt in byts for i in range(8)]
10+
11+
def DecodeBits(stream, bitpos, minval, bit_sizes):
12+
val = minval
13+
for pos in range(len(bit_sizes)):
14+
bit_size = bit_sizes[pos]
15+
if pos + 1 < len(bit_sizes):
16+
bit = stream[bitpos]
17+
bitpos += 1
18+
else:
19+
bit = 0
20+
if bit:
21+
val += (1 << bit_size)
22+
else:
23+
for b in range(bit_size):
24+
bit = stream[bitpos]
25+
bitpos += 1
26+
val += bit << (bit_size - 1 - b)
27+
return (val, bitpos)
28+
assert(False)
29+
30+
def DecodeType(stream, bitpos):
31+
return DecodeBits(stream, bitpos, 0, [0, 0, 1])
32+
33+
def DecodeASN(stream, bitpos):
34+
return DecodeBits(stream, bitpos, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24])
35+
36+
def DecodeMatch(stream, bitpos):
37+
return DecodeBits(stream, bitpos, 2, [1, 2, 3, 4, 5, 6, 7, 8])
38+
39+
def DecodeJump(stream, bitpos):
40+
return DecodeBits(stream, bitpos, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
41+
42+
def Interpret(asmap, num, bits):
43+
pos = 0
44+
default = None
45+
while True:
46+
assert(len(asmap) >= pos + 1)
47+
(opcode, pos) = DecodeType(asmap, pos)
48+
if opcode == 0:
49+
(asn, pos) = DecodeASN(asmap, pos)
50+
return asn
51+
elif opcode == 1:
52+
(jump, pos) = DecodeJump(asmap, pos)
53+
if (num >> (bits - 1)) & 1:
54+
pos += jump
55+
bits -= 1
56+
elif opcode == 2:
57+
(match, pos) = DecodeMatch(asmap, pos)
58+
matchlen = match.bit_length() - 1
59+
for bit in range(matchlen):
60+
if ((num >> (bits - 1)) & 1) != ((match >> (matchlen - 1 - bit)) & 1):
61+
return default
62+
bits -= 1
63+
elif opcode == 3:
64+
(default, pos) = DecodeASN(asmap, pos)
65+
else:
66+
assert(False)
67+
68+
69+
70+
def decode_ip(ip: str) -> int:
71+
addr = ipaddress.ip_address(ip)
72+
if isinstance(addr, ipaddress.IPv4Address):
73+
return int.from_bytes(addr.packed, 'big') + 0xffff00000000
74+
elif isinstance(addr, ipaddress.IPv6Address):
75+
return int.from_bytes(addr.packed, 'big')
76+
77+
class ASMap:
78+
def __init__(self, filename):
79+
'''
80+
Instantiate an ASMap from a file.
81+
'''
82+
with open(filename, "rb") as f:
83+
self.asmap = DecodeBytes(f.read())
84+
85+
def lookup_asn(self, ip):
86+
'''
87+
Look up the ASN for an IP, returns an ASN id as integer or None if not
88+
known.
89+
'''
90+
return Interpret(self.asmap, decode_ip(ip), 128)

contrib/seeds/makeseeds.py

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
# Generate seeds.txt from Pieter's DNS seeder
77
#
88

9+
import argparse
910
import re
1011
import sys
11-
import dns.resolver
1212
import collections
1313
from typing import List, Dict, Union
1414

15+
from asmap import ASMap
16+
1517
NSEEDS=512
1618

1719
MAX_SEEDS_PER_ASN = {
@@ -123,34 +125,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]:
123125
hist[ip['sortkey']].append(ip)
124126
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
125127

126-
def lookup_asn(net: str, ip: str) -> Union[int, None]:
127-
""" Look up the asn for an `ip` address by querying cymru.com
128-
on network `net` (e.g. ipv4 or ipv6).
129-
130-
Returns in integer ASN or None if it could not be found.
131-
"""
132-
try:
133-
if net == 'ipv4':
134-
ipaddr = ip
135-
prefix = '.origin'
136-
else: # http://www.team-cymru.com/IP-ASN-mapping.html
137-
res = str() # 2001:4860:b002:23::68
138-
for nb in ip.split(':')[:4]: # pick the first 4 nibbles
139-
for c in nb.zfill(4): # right padded with '0'
140-
res += c + '.' # 2001 4860 b002 0023
141-
ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
142-
prefix = '.origin6'
143-
144-
asn = int([x.to_text() for x in dns.resolver.resolve('.'.join(
145-
reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com',
146-
'TXT').response.answer][0].split('\"')[1].split(' ')[0])
147-
return asn
148-
except Exception as e:
149-
sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n')
150-
return None
151-
152128
# Based on Greg Maxwell's seed_filter.py
153-
def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
129+
def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
154130
""" Prunes `ips` by
155131
(a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
156132
(b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
@@ -173,13 +149,14 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di
173149
# do not add this ip as we already too many
174150
# ips from this network
175151
continue
176-
asn = lookup_asn(ip['net'], ip['ip'])
177-
if asn is None or asn_count[asn] == max_per_asn[ip['net']]:
152+
asn = asmap.lookup_asn(ip['ip'])
153+
if asn is None or asn_count[ip['net'], asn] == max_per_asn[ip['net']]:
178154
# do not add this ip as we already have too many
179155
# ips from this ASN on this network
180156
continue
181-
asn_count[asn] += 1
157+
asn_count[ip['net'], asn] += 1
182158
net_count[ip['net']] += 1
159+
ip['asn'] = asn
183160
result.append(ip)
184161

185162
# Add back Onions (up to max_per_net)
@@ -195,7 +172,18 @@ def ip_stats(ips: List[Dict]) -> str:
195172

196173
return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}"
197174

175+
def parse_args():
176+
argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.')
177+
argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True)
178+
return argparser.parse_args()
179+
198180
def main():
181+
args = parse_args()
182+
183+
print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True)
184+
asmap = ASMap(args.asmap)
185+
print('Done.', file=sys.stderr)
186+
199187
lines = sys.stdin.readlines()
200188
ips = [parseline(line) for line in lines]
201189

@@ -230,15 +218,18 @@ def main():
230218
ips = filtermultiport(ips)
231219
print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr)
232220
# Look up ASNs and limit results, both per ASN and globally.
233-
ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
221+
ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS)
234222
print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr)
235223
# Sort the results by IP address (for deterministic output).
236224
ips.sort(key=lambda x: (x['net'], x['sortkey']))
237225
for ip in ips:
238226
if ip['net'] == 'ipv6':
239-
print('[%s]:%i' % (ip['ip'], ip['port']))
227+
print(f"[{ip['ip']}]:{ip['port']}", end="")
240228
else:
241-
print('%s:%i' % (ip['ip'], ip['port']))
229+
print(f"{ip['ip']}:{ip['port']}", end="")
230+
if 'asn' in ip:
231+
print(f" # AS{ip['asn']}", end="")
232+
print()
242233

243234
if __name__ == '__main__':
244235
main()

0 commit comments

Comments
 (0)