|
19 | 19 |
|
20 | 20 | import org.elasticsearch.core.SuppressForbidden;
|
21 | 21 | import org.elasticsearch.core.Tuple;
|
| 22 | +import org.elasticsearch.xcontent.Text; |
| 23 | +import org.elasticsearch.xcontent.XContentString; |
22 | 24 |
|
23 | 25 | import java.net.Inet4Address;
|
24 | 26 | import java.net.Inet6Address;
|
25 | 27 | import java.net.InetAddress;
|
26 | 28 | import java.net.UnknownHostException;
|
27 | 29 | import java.nio.ByteBuffer;
|
| 30 | +import java.nio.charset.StandardCharsets; |
28 | 31 | import java.util.Arrays;
|
29 | 32 | import java.util.Locale;
|
30 | 33 |
|
31 | 34 | public class InetAddresses {
|
32 |
| - private static int IPV4_PART_COUNT = 4; |
33 |
| - private static int IPV6_PART_COUNT = 8; |
| 35 | + private static final int IPV4_PART_COUNT = 4; |
| 36 | + private static final int IPV6_PART_COUNT = 8; |
34 | 37 |
|
35 | 38 | public static boolean isInetAddress(String ipString) {
|
36 |
| - return ipStringToBytes(ipString) != null; |
| 39 | + XContentString.UTF8Bytes bytes = new Text(ipString).bytes(); |
| 40 | + return ipStringToBytes(bytes.bytes(), bytes.offset(), bytes.length(), false) != null; |
37 | 41 | }
|
38 | 42 |
|
39 | 43 | public static String getIpOrHost(String ipString) {
|
40 |
| - byte[] bytes = ipStringToBytes(ipString); |
| 44 | + XContentString.UTF8Bytes utf8Bytes = new Text(ipString).bytes(); |
| 45 | + byte[] bytes = ipStringToBytes(utf8Bytes.bytes(), utf8Bytes.offset(), utf8Bytes.length(), false); |
41 | 46 | if (bytes == null) { // is not InetAddress
|
42 | 47 | return ipString;
|
43 | 48 | }
|
44 | 49 | return NetworkAddress.format(bytesToInetAddress(bytes));
|
45 | 50 | }
|
46 | 51 |
|
47 |
| - private static byte[] ipStringToBytes(String ipString) { |
| 52 | + /** |
| 53 | + * Encodes the given {@link XContentString} in binary encoding, always using 16 bytes for both IPv4 and IPv6 addresses. |
| 54 | + * This is how Lucene encodes IP addresses in {@link org.apache.lucene.document.InetAddressPoint}. |
| 55 | + * |
| 56 | + * @param ipString the IP address as a string |
| 57 | + * @return a byte array containing the binary representation of the IP address |
| 58 | + * @throws IllegalArgumentException if the argument is not a valid IP string literal |
| 59 | + */ |
| 60 | + public static byte[] encodeAsIpv6(XContentString ipString) { |
| 61 | + XContentString.UTF8Bytes uft8Bytes = ipString.bytes(); |
| 62 | + byte[] address = ipStringToBytes(uft8Bytes.bytes(), uft8Bytes.offset(), uft8Bytes.length(), true); |
| 63 | + // The argument was malformed, i.e. not an IP string literal. |
| 64 | + if (address == null) { |
| 65 | + throw new IllegalArgumentException(String.format(Locale.ROOT, "'%s' is not an IP string literal.", ipString.string())); |
| 66 | + } |
| 67 | + return CIDRUtils.encode(address); |
| 68 | + } |
| 69 | + |
| 70 | + private static byte[] ipStringToBytes(byte[] ipUtf8, int offset, int length, boolean asIpv6) { |
48 | 71 | // Make a first pass to categorize the characters in this string.
|
49 | 72 | boolean hasColon = false;
|
50 | 73 | boolean hasDot = false;
|
51 |
| - int percentIndex = -1; |
52 |
| - for (int i = 0; i < ipString.length(); i++) { |
53 |
| - char c = ipString.charAt(i); |
54 |
| - if (c == '.') { |
| 74 | + for (int i = offset; i < offset + length; i++) { |
| 75 | + byte c = ipUtf8[i]; |
| 76 | + if ((c & 0b10000000) != 0) { |
| 77 | + return null; // Only allow ASCII characters. |
| 78 | + } else if (c == '.') { |
55 | 79 | hasDot = true;
|
56 | 80 | } else if (c == ':') {
|
57 | 81 | if (hasDot) {
|
58 | 82 | return null; // Colons must not appear after dots.
|
59 | 83 | }
|
60 | 84 | hasColon = true;
|
61 | 85 | } else if (c == '%') {
|
62 |
| - percentIndex = i; |
| 86 | + if (i == offset + length - 1) { |
| 87 | + return null; // Filter out strings that end in % and have an empty scope ID. |
| 88 | + } |
| 89 | + length = i; |
63 | 90 | break; // Everything after a '%' is ignored (it's a Scope ID)
|
64 |
| - } else if (Character.digit(c, 16) == -1) { |
65 |
| - return null; // Everything else must be a decimal or hex digit. |
66 | 91 | }
|
67 | 92 | }
|
68 | 93 |
|
69 | 94 | // Now decide which address family to parse.
|
70 | 95 | if (hasColon) {
|
71 | 96 | if (hasDot) {
|
72 |
| - ipString = convertDottedQuadToHex(ipString); |
73 |
| - if (ipString == null) { |
| 97 | + ipUtf8 = convertDottedQuadToHex(ipUtf8, offset, length); |
| 98 | + if (ipUtf8 == null) { |
74 | 99 | return null;
|
75 | 100 | }
|
| 101 | + offset = 0; |
| 102 | + length = ipUtf8.length; |
76 | 103 | }
|
77 |
| - if (percentIndex == ipString.length() - 1) { |
78 |
| - return null; // Filter out strings that end in % and have an empty scope ID. |
79 |
| - } |
80 |
| - if (percentIndex != -1) { |
81 |
| - ipString = ipString.substring(0, percentIndex); |
82 |
| - } |
83 |
| - return textToNumericFormatV6(ipString); |
| 104 | + return textToNumericFormatV6(ipUtf8, offset, length); |
84 | 105 | } else if (hasDot) {
|
85 |
| - return textToNumericFormatV4(ipString); |
| 106 | + return textToNumericFormatV4(ipUtf8, offset, length, asIpv6); |
86 | 107 | }
|
87 | 108 | return null;
|
88 | 109 | }
|
89 | 110 |
|
90 |
| - private static String convertDottedQuadToHex(String ipString) { |
91 |
| - int lastColon = ipString.lastIndexOf(':'); |
92 |
| - String initialPart = ipString.substring(0, lastColon + 1); |
93 |
| - String dottedQuad = ipString.substring(lastColon + 1); |
94 |
| - byte[] quad = textToNumericFormatV4(dottedQuad); |
| 111 | + private static byte[] convertDottedQuadToHex(byte[] ipUtf8, int offset, int length) { |
| 112 | + int quadOffset = -1; |
| 113 | + for (int i = 0; i < length; i++) { |
| 114 | + if (ipUtf8[i + offset] == ':') { |
| 115 | + quadOffset = i + 1; |
| 116 | + } |
| 117 | + } |
| 118 | + assert quadOffset >= 0 : "Expected at least one colon in dotted quad IPv6 address"; |
| 119 | + byte[] quad = textToNumericFormatV4(ipUtf8, offset + quadOffset, length - quadOffset, false); |
95 | 120 | if (quad == null) {
|
96 | 121 | return null;
|
97 | 122 | }
|
98 |
| - String penultimate = Integer.toHexString(((quad[0] & 0xff) << 8) | (quad[1] & 0xff)); |
99 |
| - String ultimate = Integer.toHexString(((quad[2] & 0xff) << 8) | (quad[3] & 0xff)); |
100 |
| - return initialPart + penultimate + ":" + ultimate; |
| 123 | + byte[] penultimate = Integer.toHexString(((quad[0] & 0xff) << 8) | (quad[1] & 0xff)).getBytes(StandardCharsets.US_ASCII); |
| 124 | + byte[] ultimate = Integer.toHexString(((quad[2] & 0xff) << 8) | (quad[3] & 0xff)).getBytes(StandardCharsets.US_ASCII); |
| 125 | + byte[] result = new byte[quadOffset + penultimate.length + 1 + ultimate.length]; |
| 126 | + System.arraycopy(ipUtf8, offset, result, 0, quadOffset); |
| 127 | + System.arraycopy(penultimate, 0, result, quadOffset, penultimate.length); |
| 128 | + result[quadOffset + penultimate.length] = ':'; |
| 129 | + System.arraycopy(ultimate, 0, result, quadOffset + penultimate.length + 1, ultimate.length); |
| 130 | + return result; |
101 | 131 | }
|
102 | 132 |
|
103 |
| - private static byte[] textToNumericFormatV4(String ipString) { |
104 |
| - byte[] bytes = new byte[IPV4_PART_COUNT]; |
105 |
| - byte octet = 0; |
| 133 | + private static byte[] textToNumericFormatV4(byte[] ipUtf8, int offset, int length, boolean asIpv6) { |
| 134 | + byte[] bytes; |
| 135 | + byte octet; |
| 136 | + if (asIpv6) { |
| 137 | + bytes = new byte[IPV6_PART_COUNT * 2]; |
| 138 | + System.arraycopy(CIDRUtils.IPV4_PREFIX, 0, bytes, 0, CIDRUtils.IPV4_PREFIX.length); |
| 139 | + octet = (byte) CIDRUtils.IPV4_PREFIX.length; |
| 140 | + } else { |
| 141 | + bytes = new byte[IPV4_PART_COUNT]; |
| 142 | + octet = 0; |
| 143 | + } |
106 | 144 | byte digits = 0;
|
107 |
| - for (int i = 0; i < ipString.length(); i++) { |
108 |
| - char c = ipString.charAt(i); |
| 145 | + int current = 0; |
| 146 | + for (int i = offset; i < offset + length; i++) { |
| 147 | + byte c = ipUtf8[i]; |
109 | 148 | if (c == '.') {
|
110 |
| - octet++; |
111 |
| - if (octet > 3 /* too many octets */ || digits == 0 /* empty octet */) { |
| 149 | + if (octet >= bytes.length /* too many octets */ |
| 150 | + || digits == 0 /* empty octet */ |
| 151 | + || current > 255 /* octet is outside a byte range */) { |
112 | 152 | return null;
|
113 | 153 | }
|
| 154 | + bytes[octet++] = (byte) current; |
| 155 | + current = 0; |
114 | 156 | digits = 0;
|
115 | 157 | } else if (c >= '0' && c <= '9') {
|
116 |
| - digits++; |
117 |
| - var next = bytes[octet] * 10 + (c - '0'); |
118 |
| - if (next > 255 /* octet is outside a byte range */ || (digits > 1 && bytes[octet] == 0) /* octet contains leading 0 */) { |
| 158 | + if (digits != 0 && current == 0 /* octet contains leading 0 */) { |
119 | 159 | return null;
|
120 | 160 | }
|
121 |
| - bytes[octet] = (byte) next; |
| 161 | + current = current * 10 + (c - '0'); |
| 162 | + digits++; |
122 | 163 | } else {
|
123 | 164 | return null;
|
124 | 165 | }
|
125 | 166 | }
|
126 |
| - return octet != 3 ? null : bytes; |
| 167 | + if (octet != bytes.length - 1 /* too many or too few octets */ |
| 168 | + || digits == 0 /* empty octet */ |
| 169 | + || current > 255 /* octet is outside a byte range */) { |
| 170 | + return null; |
| 171 | + } |
| 172 | + bytes[octet] = (byte) current; |
| 173 | + return bytes; |
127 | 174 | }
|
128 | 175 |
|
129 |
| - private static byte[] textToNumericFormatV6(String ipString) { |
130 |
| - // An address can have [2..8] colons, and N colons make N+1 parts. |
131 |
| - String[] parts = ipString.split(":", IPV6_PART_COUNT + 2); |
132 |
| - if (parts.length < 3 || parts.length > IPV6_PART_COUNT + 1) { |
| 176 | + private static byte[] textToNumericFormatV6(byte[] ipUtf8, int offset, int length) { |
| 177 | + if (length < 2) { |
| 178 | + // IPv6 addresses must be at least 2 characters long (e.g., "::") |
| 179 | + return null; |
| 180 | + } |
| 181 | + if (ipUtf8[offset] == ':' && ipUtf8[offset + 1] != ':') { |
| 182 | + // Addresses can't start with a single colon |
133 | 183 | return null;
|
134 | 184 | }
|
| 185 | + if (ipUtf8[offset + length - 1] == ':' && ipUtf8[offset + length - 2] != ':') { |
| 186 | + // Addresses can't end with a single colon |
| 187 | + return null; |
| 188 | + } |
| 189 | + |
| 190 | + // An IPv6 address has 8 hextets (16-bit pieces), each represented by 1-4 hex digits |
| 191 | + // Total size: 16 bytes (128 bits) |
| 192 | + ByteBuffer bytes = ByteBuffer.allocate(IPV6_PART_COUNT * 2); |
135 | 193 |
|
136 |
| - // Disregarding the endpoints, find "::" with nothing in between. |
137 |
| - // This indicates that a run of zeroes has been skipped. |
138 |
| - int skipIndex = -1; |
139 |
| - for (int i = 1; i < parts.length - 1; i++) { |
140 |
| - if (parts[i].length() == 0) { |
141 |
| - if (skipIndex >= 0) { |
142 |
| - return null; // Can't have more than one :: |
| 194 | + // Find position of :: abbreviation if present |
| 195 | + int compressedHextetIndex = -1; |
| 196 | + int hextetIndex = 0; |
| 197 | + int currentHextetStart = 0; |
| 198 | + int currentHextet = 0; |
| 199 | + for (int i = offset; i < offset + length; i++) { |
| 200 | + byte c = ipUtf8[i]; |
| 201 | + if (c == ':') { |
| 202 | + if (currentHextetStart == i) { |
| 203 | + // Two colons in a row, indicating a compressed section |
| 204 | + if (compressedHextetIndex >= 0 && i != 1) { |
| 205 | + // We've already seen a ::, can't have another |
| 206 | + return null; |
| 207 | + } |
| 208 | + compressedHextetIndex = hextetIndex; // Mark the position of the compressed section |
| 209 | + } else { |
| 210 | + if (putHextet(bytes, currentHextet) == false) { |
| 211 | + return null; |
| 212 | + } |
| 213 | + currentHextet = 0; |
| 214 | + hextetIndex++; |
143 | 215 | }
|
144 |
| - skipIndex = i; |
| 216 | + currentHextetStart = i + 1; |
| 217 | + } else if (c >= '0' && c <= '9') { |
| 218 | + // Valid hex digit |
| 219 | + currentHextet = currentHextet * 16 + (c - '0'); |
| 220 | + } else if (c >= 'a' && c <= 'f') { |
| 221 | + // Valid hex digit in lowercase |
| 222 | + currentHextet = currentHextet * 16 + (c - 'a' + 10); |
| 223 | + } else if (c >= 'A' && c <= 'F') { |
| 224 | + // Valid hex digit in uppercase |
| 225 | + currentHextet = currentHextet * 16 + (c - 'A' + 10); |
| 226 | + } else { |
| 227 | + return null; // Invalid character |
145 | 228 | }
|
146 | 229 | }
|
147 |
| - |
148 |
| - int partsHi; // Number of parts to copy from above/before the "::" |
149 |
| - int partsLo; // Number of parts to copy from below/after the "::" |
150 |
| - if (skipIndex >= 0) { |
151 |
| - // If we found a "::", then check if it also covers the endpoints. |
152 |
| - partsHi = skipIndex; |
153 |
| - partsLo = parts.length - skipIndex - 1; |
154 |
| - if (parts[0].length() == 0 && --partsHi != 0) { |
155 |
| - return null; // ^: requires ^:: |
156 |
| - } |
157 |
| - if (parts[parts.length - 1].length() == 0 && --partsLo != 0) { |
158 |
| - return null; // :$ requires ::$ |
| 230 | + if (currentHextetStart != length) { |
| 231 | + // Handle the last hextet |
| 232 | + if (putHextet(bytes, currentHextet) == false) { |
| 233 | + return null; |
159 | 234 | }
|
160 |
| - } else { |
161 |
| - // Otherwise, allocate the entire address to partsHi. The endpoints |
162 |
| - // could still be empty, but parseHextet() will check for that. |
163 |
| - partsHi = parts.length; |
164 |
| - partsLo = 0; |
| 235 | + hextetIndex++; |
165 | 236 | }
|
166 | 237 |
|
167 |
| - // If we found a ::, then we must have skipped at least one part. |
168 |
| - // Otherwise, we must have exactly the right number of parts. |
169 |
| - int partsSkipped = IPV6_PART_COUNT - (partsHi + partsLo); |
170 |
| - if ((skipIndex >= 0 ? partsSkipped >= 1 : partsSkipped == 0) == false) { |
171 |
| - return null; |
| 238 | + if (compressedHextetIndex >= 0) { |
| 239 | + if (hextetIndex >= IPV6_PART_COUNT) { |
| 240 | + return null; // Invalid, too many hextets |
| 241 | + } |
| 242 | + shiftHextetsRight(bytes, compressedHextetIndex, hextetIndex); |
| 243 | + } else if (hextetIndex != IPV6_PART_COUNT) { |
| 244 | + return null; // Invalid, not enough hextets |
172 | 245 | }
|
173 | 246 |
|
174 |
| - // Now parse the hextets into a byte array. |
175 |
| - ByteBuffer rawBytes = ByteBuffer.allocate(2 * IPV6_PART_COUNT); |
176 |
| - try { |
177 |
| - for (int i = 0; i < partsHi; i++) { |
178 |
| - rawBytes.putShort(parseHextet(parts[i])); |
179 |
| - } |
180 |
| - for (int i = 0; i < partsSkipped; i++) { |
181 |
| - rawBytes.putShort((short) 0); |
182 |
| - } |
183 |
| - for (int i = partsLo; i > 0; i--) { |
184 |
| - rawBytes.putShort(parseHextet(parts[parts.length - i])); |
185 |
| - } |
186 |
| - } catch (NumberFormatException ex) { |
187 |
| - return null; |
| 247 | + return bytes.array(); |
| 248 | + } |
| 249 | + |
| 250 | + private static void shiftHextetsRight(ByteBuffer bytes, int start, int end) { |
| 251 | + int shift = IPV6_PART_COUNT - end; |
| 252 | + for (int hextetIndexToShift = end - 1; hextetIndexToShift >= start; hextetIndexToShift--) { |
| 253 | + int bytesIndexBeforeShift = hextetIndexToShift * Short.BYTES; |
| 254 | + short hextetToShift = bytes.getShort(bytesIndexBeforeShift); |
| 255 | + bytes.putShort(bytesIndexBeforeShift, (short) 0); |
| 256 | + bytes.putShort(bytesIndexBeforeShift + shift * Short.BYTES, hextetToShift); |
188 | 257 | }
|
189 |
| - return rawBytes.array(); |
190 | 258 | }
|
191 | 259 |
|
192 |
| - private static short parseHextet(String ipPart) { |
193 |
| - // Note: we already verified that this string contains only hex digits. |
194 |
| - int hextet = Integer.parseInt(ipPart, 16); |
| 260 | + private static boolean putHextet(ByteBuffer buf, int hextet) { |
| 261 | + if (buf.remaining() < 2) { |
| 262 | + return false; |
| 263 | + } |
195 | 264 | if (hextet > 0xffff) {
|
196 |
| - throw new NumberFormatException(); |
| 265 | + return false; |
197 | 266 | }
|
198 |
| - return (short) hextet; |
| 267 | + buf.putShort((short) hextet); |
| 268 | + return true; |
199 | 269 | }
|
200 | 270 |
|
201 | 271 | /**
|
@@ -345,11 +415,29 @@ private static String hextetsToIPv6String(int[] hextets) {
|
345 | 415 | * @throws IllegalArgumentException if the argument is not a valid IP string literal
|
346 | 416 | */
|
347 | 417 | public static InetAddress forString(String ipString) {
|
348 |
| - byte[] addr = ipStringToBytes(ipString); |
| 418 | + return forString(new Text(ipString).bytes()); |
| 419 | + } |
| 420 | + |
| 421 | + /** |
| 422 | + * A variant of {@link #forString(String)} that accepts an {@link XContentString.UTF8Bytes} object, |
| 423 | + * which utilizes a more efficient implementation for parsing the IP address. |
| 424 | + */ |
| 425 | + public static InetAddress forString(XContentString.UTF8Bytes bytes) { |
| 426 | + return forString(bytes.bytes(), bytes.offset(), bytes.length()); |
| 427 | + } |
| 428 | + |
| 429 | + /** |
| 430 | + * A variant of {@link #forString(String)} that accepts a byte array, |
| 431 | + * which utilizes a more efficient implementation for parsing the IP address. |
| 432 | + */ |
| 433 | + public static InetAddress forString(byte[] ipUtf8, int offset, int length) { |
| 434 | + byte[] addr = ipStringToBytes(ipUtf8, offset, length, false); |
349 | 435 |
|
350 | 436 | // The argument was malformed, i.e. not an IP string literal.
|
351 | 437 | if (addr == null) {
|
352 |
| - throw new IllegalArgumentException(String.format(Locale.ROOT, "'%s' is not an IP string literal.", ipString)); |
| 438 | + throw new IllegalArgumentException( |
| 439 | + String.format(Locale.ROOT, "'%s' is not an IP string literal.", new String(ipUtf8, offset, length, StandardCharsets.UTF_8)) |
| 440 | + ); |
353 | 441 | }
|
354 | 442 |
|
355 | 443 | return bytesToInetAddress(addr);
|
|
0 commit comments