|
20 | 20 |
|
21 | 21 | namespace {
|
22 | 22 |
|
| 23 | +//////////////////////////////////////////////////////////////////////////// |
| 24 | +// Checksum // |
| 25 | +//////////////////////////////////////////////////////////////////////////// |
| 26 | + |
| 27 | +// This section implements a checksum algorithm for descriptors with the |
| 28 | +// following properties: |
| 29 | +// * Mistakes in a descriptor string are measured in "symbol errors". The higher |
| 30 | +// the number of symbol errors, the harder it is to detect: |
| 31 | +// * An error substituting a character from 0123456789()[],'/*abcdefgh@:$%{} for |
| 32 | +// another in that set always counts as 1 symbol error. |
| 33 | +// * Note that hex encoded keys are covered by these characters. Xprvs and |
| 34 | +// xpubs use other characters too, but already have their own checksum |
| 35 | +// mechanism. |
| 36 | +// * Function names like "multi()" use other characters, but mistakes in |
| 37 | +// these would generally result in an unparseable descriptor. |
| 38 | +// * A case error always counts as 1 symbol error. |
| 39 | +// * Any other 1 character substitution error counts as 1 or 2 symbol errors. |
| 40 | +// * Any 1 symbol error is always detected. |
| 41 | +// * Any 2 or 3 symbol error in a descriptor of up to 49154 characters is always detected. |
| 42 | +// * Any 4 symbol error in a descriptor of up to 507 characters is always detected. |
| 43 | +// * Any 5 symbol error in a descriptor of up to 77 characters is always detected. |
| 44 | +// * Is optimized to minimize the chance a 5 symbol error in a descriptor up to 387 characters is undetected |
| 45 | +// * Random errors have a chance of 1 in 2**40 of being undetected. |
| 46 | +// |
| 47 | +// These properties are achieved by expanding every group of 3 (non checksum) characters into |
| 48 | +// 4 GF(32) symbols, over which a cyclic code is defined. |
| 49 | + |
| 50 | +/* |
| 51 | + * Interprets c as 8 groups of 5 bits which are the coefficients of a degree 8 polynomial over GF(32), |
| 52 | + * multiplies that polynomial by x, computes its remainder modulo a generator, and adds the constant term val. |
| 53 | + * |
| 54 | + * This generator is G(x) = x^8 + {30}x^7 + {23}x^6 + {15}x^5 + {14}x^4 + {10}x^3 + {6}x^2 + {12}x + {9}. |
| 55 | + * It is chosen to define an cyclic error detecting code which is selected by: |
| 56 | + * - Starting from all BCH codes over GF(32) of degree 8 and below, which by construction guarantee detecting |
| 57 | + * 3 errors in windows up to 19000 symbols. |
| 58 | + * - Taking all those generators, and for degree 7 ones, extend them to degree 8 by adding all degree-1 factors. |
| 59 | + * - Selecting just the set of generators that guarantee detecting 4 errors in a window of length 512. |
| 60 | + * - Selecting one of those with best worst-case behavior for 5 errors in windows of length up to 512. |
| 61 | + * |
| 62 | + * The generator and the constants to implement it can be verified using this Sage code: |
| 63 | + * B = GF(2) # Binary field |
| 64 | + * BP.<b> = B[] # Polynomials over the binary field |
| 65 | + * F_mod = b**5 + b**3 + 1 |
| 66 | + * F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition |
| 67 | + * FP.<x> = F[] # Polynomials over GF(32) |
| 68 | + * E_mod = x**3 + x + F.fetch_int(8) |
| 69 | + * E.<e> = F.extension(E_mod) # Extension field definition |
| 70 | + * alpha = e**2743 # Choice of an element in extension field |
| 71 | + * for p in divisors(E.order() - 1): # Verify alpha has order 32767. |
| 72 | + * assert((alpha**p == 1) == (p % 32767 == 0)) |
| 73 | + * G = lcm([(alpha**i).minpoly() for i in [1056,1057,1058]] + [x + 1]) |
| 74 | + * print(G) # Print out the generator |
| 75 | + * for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(G mod x^8), packed in hex integers. |
| 76 | + * v = 0 |
| 77 | + * for coef in reversed((F.fetch_int(i)*(G % x**8)).coefficients(sparse=True)): |
| 78 | + * v = v*32 + coef.integer_representation() |
| 79 | + * print("0x%x" % v) |
| 80 | + */ |
| 81 | +uint64_t PolyMod(uint64_t c, int val) |
| 82 | +{ |
| 83 | + uint8_t c0 = c >> 35; |
| 84 | + c = ((c & 0x7ffffffff) << 5) ^ val; |
| 85 | + if (c0 & 1) c ^= 0xf5dee51989; |
| 86 | + if (c0 & 2) c ^= 0xa9fdca3312; |
| 87 | + if (c0 & 4) c ^= 0x1bab10e32d; |
| 88 | + if (c0 & 8) c ^= 0x3706b1677a; |
| 89 | + if (c0 & 16) c ^= 0x644d626ffd; |
| 90 | + return c; |
| 91 | +} |
| 92 | + |
| 93 | +std::string DescriptorChecksum(const Span<const char>& span) |
| 94 | +{ |
| 95 | + /** A character set designed such that: |
| 96 | + * - The most common 'unprotected' descriptor characters (hex, keypaths) are in the first group of 32. |
| 97 | + * - Case errors cause an offset that's a multiple of 32. |
| 98 | + * - As many alphabetic characters are in the same group (while following the above restrictions). |
| 99 | + * |
| 100 | + * If p(x) gives the position of a character c in this character set, every group of 3 characters |
| 101 | + * (a,b,c) is encoded as the 4 symbols (p(a) & 31, p(b) & 31, p(c) & 31, (p(a) / 32) + 3 * (p(b) / 32) + 9 * (p(c) / 32). |
| 102 | + * This means that changes that only affect the lower 5 bits of the position, or only the higher 2 bits, will just |
| 103 | + * affect a single symbol. |
| 104 | + * |
| 105 | + * As a result, within-group-of-32 errors count as 1 symbol, as do cross-group errors that don't affect |
| 106 | + * the position within the groups. |
| 107 | + */ |
| 108 | + static std::string INPUT_CHARSET = |
| 109 | + "0123456789()[],'/*abcdefgh@:$%{}" |
| 110 | + "IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~" |
| 111 | + "ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; |
| 112 | + |
| 113 | + /** The character set for the checksum itself (same as bech32). */ |
| 114 | + static std::string CHECKSUM_CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; |
| 115 | + |
| 116 | + uint64_t c = 1; |
| 117 | + int cls = 0; |
| 118 | + int clscount = 0; |
| 119 | + for (auto ch : span) { |
| 120 | + auto pos = INPUT_CHARSET.find(ch); |
| 121 | + if (pos == std::string::npos) return ""; |
| 122 | + c = PolyMod(c, pos & 31); // Emit a symbol for the position inside the group, for every character. |
| 123 | + cls = cls * 3 + (pos >> 5); // Accumulate the group numbers |
| 124 | + if (++clscount == 3) { |
| 125 | + // Emit an extra symbol representing the group numbers, for every 3 characters. |
| 126 | + c = PolyMod(c, cls); |
| 127 | + cls = 0; |
| 128 | + clscount = 0; |
| 129 | + } |
| 130 | + } |
| 131 | + if (clscount > 0) c = PolyMod(c, cls); |
| 132 | + for (int j = 0; j < 8; ++j) c = PolyMod(c, 0); // Shift further to determine the checksum. |
| 133 | + c ^= 1; // Prevent appending zeroes from not affecting the checksum. |
| 134 | + |
| 135 | + std::string ret(8, ' '); |
| 136 | + for (int j = 0; j < 8; ++j) ret[j] = CHECKSUM_CHARSET[(c >> (5 * (7 - j))) & 31]; |
| 137 | + return ret; |
| 138 | +} |
| 139 | + |
| 140 | +std::string AddChecksum(const std::string& str) { return str + "#" + DescriptorChecksum(MakeSpan(str)); } |
| 141 | + |
23 | 142 | ////////////////////////////////////////////////////////////////////////////
|
24 | 143 | // Internal representation //
|
25 | 144 | ////////////////////////////////////////////////////////////////////////////
|
@@ -273,10 +392,15 @@ class DescriptorImpl : public Descriptor
|
273 | 392 | {
|
274 | 393 | std::string ret;
|
275 | 394 | ToStringHelper(nullptr, ret, false);
|
276 |
| - return ret; |
| 395 | + return AddChecksum(ret); |
277 | 396 | }
|
278 | 397 |
|
279 |
| - bool ToPrivateString(const SigningProvider& arg, std::string& out) const override final { return ToStringHelper(&arg, out, true); } |
| 398 | + bool ToPrivateString(const SigningProvider& arg, std::string& out) const override final |
| 399 | + { |
| 400 | + bool ret = ToStringHelper(&arg, out, true); |
| 401 | + out = AddChecksum(out); |
| 402 | + return ret; |
| 403 | + } |
280 | 404 |
|
281 | 405 | bool ExpandHelper(int pos, const SigningProvider& arg, Span<const unsigned char>* cache_read, std::vector<CScript>& output_scripts, FlatSigningProvider& out, std::vector<unsigned char>* cache_write) const
|
282 | 406 | {
|
@@ -751,11 +875,25 @@ std::unique_ptr<DescriptorImpl> InferScript(const CScript& script, ParseScriptCo
|
751 | 875 | return MakeUnique<RawDescriptor>(script);
|
752 | 876 | }
|
753 | 877 |
|
| 878 | + |
754 | 879 | } // namespace
|
755 | 880 |
|
756 |
| -std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out) |
| 881 | +std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out, bool require_checksum) |
757 | 882 | {
|
758 | 883 | Span<const char> sp(descriptor.data(), descriptor.size());
|
| 884 | + |
| 885 | + // Checksum checks |
| 886 | + auto check_split = Split(sp, '#'); |
| 887 | + if (check_split.size() > 2) return nullptr; // Multiple '#' symbols |
| 888 | + if (check_split.size() == 1 && require_checksum) return nullptr; // Missing checksum |
| 889 | + if (check_split.size() == 2) { |
| 890 | + if (check_split[1].size() != 8) return nullptr; // Unexpected length for checksum |
| 891 | + auto checksum = DescriptorChecksum(check_split[0]); |
| 892 | + if (checksum.empty()) return nullptr; // Invalid characters in payload |
| 893 | + if (!std::equal(checksum.begin(), checksum.end(), check_split[1].begin())) return nullptr; // Checksum mismatch |
| 894 | + } |
| 895 | + sp = check_split[0]; |
| 896 | + |
759 | 897 | auto ret = ParseScript(sp, ParseScriptContext::TOP, out);
|
760 | 898 | if (sp.size() == 0 && ret) return std::unique_ptr<Descriptor>(std::move(ret));
|
761 | 899 | return nullptr;
|
|
0 commit comments