Skip to content

Commit 624c147

Browse files
committed
Optimize IP field parsing
1 parent 6b8212a commit 624c147

File tree

6 files changed

+332
-132
lines changed

6 files changed

+332
-132
lines changed

server/src/main/java/org/elasticsearch/common/network/InetAddresses.java

Lines changed: 186 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -19,183 +19,253 @@
1919

2020
import org.elasticsearch.core.SuppressForbidden;
2121
import org.elasticsearch.core.Tuple;
22+
import org.elasticsearch.xcontent.Text;
23+
import org.elasticsearch.xcontent.XContentString;
2224

2325
import java.net.Inet4Address;
2426
import java.net.Inet6Address;
2527
import java.net.InetAddress;
2628
import java.net.UnknownHostException;
2729
import java.nio.ByteBuffer;
30+
import java.nio.charset.StandardCharsets;
2831
import java.util.Arrays;
2932
import java.util.Locale;
3033

3134
public class InetAddresses {
32-
private static int IPV4_PART_COUNT = 4;
33-
private static int IPV6_PART_COUNT = 8;
35+
private static final int IPV4_PART_COUNT = 4;
36+
private static final int IPV6_PART_COUNT = 8;
3437

3538
public static boolean isInetAddress(String ipString) {
36-
return ipStringToBytes(ipString) != null;
39+
XContentString.UTF8Bytes bytes = new Text(ipString).bytes();
40+
return ipStringToBytes(bytes.bytes(), bytes.offset(), bytes.length(), false) != null;
3741
}
3842

3943
public static String getIpOrHost(String ipString) {
40-
byte[] bytes = ipStringToBytes(ipString);
44+
XContentString.UTF8Bytes utf8Bytes = new Text(ipString).bytes();
45+
byte[] bytes = ipStringToBytes(utf8Bytes.bytes(), utf8Bytes.offset(), utf8Bytes.length(), false);
4146
if (bytes == null) { // is not InetAddress
4247
return ipString;
4348
}
4449
return NetworkAddress.format(bytesToInetAddress(bytes));
4550
}
4651

47-
private static byte[] ipStringToBytes(String ipString) {
52+
/**
53+
* Encodes the given {@link XContentString} in binary encoding, always using 16 bytes for both IPv4 and IPv6 addresses.
54+
* This is how Lucene encodes IP addresses in {@link org.apache.lucene.document.InetAddressPoint}.
55+
*
56+
* @param ipString the IP address as a string
57+
* @return a byte array containing the binary representation of the IP address
58+
* @throws IllegalArgumentException if the argument is not a valid IP string literal
59+
*/
60+
public static byte[] encodeAsIpv6(XContentString ipString) {
61+
XContentString.UTF8Bytes uft8Bytes = ipString.bytes();
62+
byte[] address = ipStringToBytes(uft8Bytes.bytes(), uft8Bytes.offset(), uft8Bytes.length(), true);
63+
// The argument was malformed, i.e. not an IP string literal.
64+
if (address == null) {
65+
throw new IllegalArgumentException(String.format(Locale.ROOT, "'%s' is not an IP string literal.", ipString.string()));
66+
}
67+
return CIDRUtils.encode(address);
68+
}
69+
70+
private static byte[] ipStringToBytes(byte[] ipUtf8, int offset, int length, boolean asIpv6) {
4871
// Make a first pass to categorize the characters in this string.
4972
boolean hasColon = false;
5073
boolean hasDot = false;
51-
int percentIndex = -1;
52-
for (int i = 0; i < ipString.length(); i++) {
53-
char c = ipString.charAt(i);
54-
if (c == '.') {
74+
for (int i = offset; i < offset + length; i++) {
75+
byte c = ipUtf8[i];
76+
if ((c & 0b10000000) != 0) {
77+
return null; // Only allow ASCII characters.
78+
} else if (c == '.') {
5579
hasDot = true;
5680
} else if (c == ':') {
5781
if (hasDot) {
5882
return null; // Colons must not appear after dots.
5983
}
6084
hasColon = true;
6185
} else if (c == '%') {
62-
percentIndex = i;
86+
if (i == offset + length - 1) {
87+
return null; // Filter out strings that end in % and have an empty scope ID.
88+
}
89+
length = i;
6390
break; // Everything after a '%' is ignored (it's a Scope ID)
64-
} else if (Character.digit(c, 16) == -1) {
65-
return null; // Everything else must be a decimal or hex digit.
6691
}
6792
}
6893

6994
// Now decide which address family to parse.
7095
if (hasColon) {
7196
if (hasDot) {
72-
ipString = convertDottedQuadToHex(ipString);
73-
if (ipString == null) {
97+
ipUtf8 = convertDottedQuadToHex(ipUtf8, offset, length);
98+
if (ipUtf8 == null) {
7499
return null;
75100
}
101+
offset = 0;
102+
length = ipUtf8.length;
76103
}
77-
if (percentIndex == ipString.length() - 1) {
78-
return null; // Filter out strings that end in % and have an empty scope ID.
79-
}
80-
if (percentIndex != -1) {
81-
ipString = ipString.substring(0, percentIndex);
82-
}
83-
return textToNumericFormatV6(ipString);
104+
return textToNumericFormatV6(ipUtf8, offset, length);
84105
} else if (hasDot) {
85-
return textToNumericFormatV4(ipString);
106+
return textToNumericFormatV4(ipUtf8, offset, length, asIpv6);
86107
}
87108
return null;
88109
}
89110

90-
private static String convertDottedQuadToHex(String ipString) {
91-
int lastColon = ipString.lastIndexOf(':');
92-
String initialPart = ipString.substring(0, lastColon + 1);
93-
String dottedQuad = ipString.substring(lastColon + 1);
94-
byte[] quad = textToNumericFormatV4(dottedQuad);
111+
private static byte[] convertDottedQuadToHex(byte[] ipUtf8, int offset, int length) {
112+
int quadOffset = -1;
113+
for (int i = 0; i < length; i++) {
114+
if (ipUtf8[i + offset] == ':') {
115+
quadOffset = i + 1;
116+
}
117+
}
118+
assert quadOffset >= 0 : "Expected at least one colon in dotted quad IPv6 address";
119+
byte[] quad = textToNumericFormatV4(ipUtf8, offset + quadOffset, length - quadOffset, false);
95120
if (quad == null) {
96121
return null;
97122
}
98-
String penultimate = Integer.toHexString(((quad[0] & 0xff) << 8) | (quad[1] & 0xff));
99-
String ultimate = Integer.toHexString(((quad[2] & 0xff) << 8) | (quad[3] & 0xff));
100-
return initialPart + penultimate + ":" + ultimate;
123+
byte[] penultimate = Integer.toHexString(((quad[0] & 0xff) << 8) | (quad[1] & 0xff)).getBytes(StandardCharsets.US_ASCII);
124+
byte[] ultimate = Integer.toHexString(((quad[2] & 0xff) << 8) | (quad[3] & 0xff)).getBytes(StandardCharsets.US_ASCII);
125+
byte[] result = new byte[quadOffset + penultimate.length + 1 + ultimate.length];
126+
System.arraycopy(ipUtf8, offset, result, 0, quadOffset);
127+
System.arraycopy(penultimate, 0, result, quadOffset, penultimate.length);
128+
result[quadOffset + penultimate.length] = ':';
129+
System.arraycopy(ultimate, 0, result, quadOffset + penultimate.length + 1, ultimate.length);
130+
return result;
101131
}
102132

103-
private static byte[] textToNumericFormatV4(String ipString) {
104-
byte[] bytes = new byte[IPV4_PART_COUNT];
105-
byte octet = 0;
133+
private static byte[] textToNumericFormatV4(byte[] ipUtf8, int offset, int length, boolean asIpv6) {
134+
byte[] bytes;
135+
byte octet;
136+
if (asIpv6) {
137+
bytes = new byte[IPV6_PART_COUNT * 2];
138+
System.arraycopy(CIDRUtils.IPV4_PREFIX, 0, bytes, 0, CIDRUtils.IPV4_PREFIX.length);
139+
octet = (byte) CIDRUtils.IPV4_PREFIX.length;
140+
} else {
141+
bytes = new byte[IPV4_PART_COUNT];
142+
octet = 0;
143+
}
106144
byte digits = 0;
107-
for (int i = 0; i < ipString.length(); i++) {
108-
char c = ipString.charAt(i);
145+
int current = 0;
146+
for (int i = offset; i < offset + length; i++) {
147+
byte c = ipUtf8[i];
109148
if (c == '.') {
110-
octet++;
111-
if (octet > 3 /* too many octets */ || digits == 0 /* empty octet */) {
149+
if (octet >= bytes.length /* too many octets */
150+
|| digits == 0 /* empty octet */
151+
|| current > 255 /* octet is outside a byte range */) {
112152
return null;
113153
}
154+
bytes[octet++] = (byte) current;
155+
current = 0;
114156
digits = 0;
115157
} else if (c >= '0' && c <= '9') {
116-
digits++;
117-
var next = bytes[octet] * 10 + (c - '0');
118-
if (next > 255 /* octet is outside a byte range */ || (digits > 1 && bytes[octet] == 0) /* octet contains leading 0 */) {
158+
if (digits != 0 && current == 0 /* octet contains leading 0 */) {
119159
return null;
120160
}
121-
bytes[octet] = (byte) next;
161+
current = current * 10 + (c - '0');
162+
digits++;
122163
} else {
123164
return null;
124165
}
125166
}
126-
return octet != 3 ? null : bytes;
167+
if (octet != bytes.length - 1 /* too many or too few octets */
168+
|| digits == 0 /* empty octet */
169+
|| current > 255 /* octet is outside a byte range */) {
170+
return null;
171+
}
172+
bytes[octet] = (byte) current;
173+
return bytes;
127174
}
128175

129-
private static byte[] textToNumericFormatV6(String ipString) {
130-
// An address can have [2..8] colons, and N colons make N+1 parts.
131-
String[] parts = ipString.split(":", IPV6_PART_COUNT + 2);
132-
if (parts.length < 3 || parts.length > IPV6_PART_COUNT + 1) {
176+
private static byte[] textToNumericFormatV6(byte[] ipUtf8, int offset, int length) {
177+
if (length < 2) {
178+
// IPv6 addresses must be at least 2 characters long (e.g., "::")
179+
return null;
180+
}
181+
if (ipUtf8[offset] == ':' && ipUtf8[offset + 1] != ':') {
182+
// Addresses can't start with a single colon
133183
return null;
134184
}
185+
if (ipUtf8[offset + length - 1] == ':' && ipUtf8[offset + length - 2] != ':') {
186+
// Addresses can't end with a single colon
187+
return null;
188+
}
189+
190+
// An IPv6 address has 8 hextets (16-bit pieces), each represented by 1-4 hex digits
191+
// Total size: 16 bytes (128 bits)
192+
ByteBuffer bytes = ByteBuffer.allocate(IPV6_PART_COUNT * 2);
135193

136-
// Disregarding the endpoints, find "::" with nothing in between.
137-
// This indicates that a run of zeroes has been skipped.
138-
int skipIndex = -1;
139-
for (int i = 1; i < parts.length - 1; i++) {
140-
if (parts[i].length() == 0) {
141-
if (skipIndex >= 0) {
142-
return null; // Can't have more than one ::
194+
// Find position of :: abbreviation if present
195+
int compressedHextetIndex = -1;
196+
int hextetIndex = 0;
197+
int currentHextetStart = 0;
198+
int currentHextet = 0;
199+
for (int i = offset; i < offset + length; i++) {
200+
byte c = ipUtf8[i];
201+
if (c == ':') {
202+
if (currentHextetStart == i) {
203+
// Two colons in a row, indicating a compressed section
204+
if (compressedHextetIndex >= 0 && i != 1) {
205+
// We've already seen a ::, can't have another
206+
return null;
207+
}
208+
compressedHextetIndex = hextetIndex; // Mark the position of the compressed section
209+
} else {
210+
if (putHextet(bytes, currentHextet) == false) {
211+
return null;
212+
}
213+
currentHextet = 0;
214+
hextetIndex++;
143215
}
144-
skipIndex = i;
216+
currentHextetStart = i + 1;
217+
} else if (c >= '0' && c <= '9') {
218+
// Valid hex digit
219+
currentHextet = currentHextet * 16 + (c - '0');
220+
} else if (c >= 'a' && c <= 'f') {
221+
// Valid hex digit in lowercase
222+
currentHextet = currentHextet * 16 + (c - 'a' + 10);
223+
} else if (c >= 'A' && c <= 'F') {
224+
// Valid hex digit in uppercase
225+
currentHextet = currentHextet * 16 + (c - 'A' + 10);
226+
} else {
227+
return null; // Invalid character
145228
}
146229
}
147-
148-
int partsHi; // Number of parts to copy from above/before the "::"
149-
int partsLo; // Number of parts to copy from below/after the "::"
150-
if (skipIndex >= 0) {
151-
// If we found a "::", then check if it also covers the endpoints.
152-
partsHi = skipIndex;
153-
partsLo = parts.length - skipIndex - 1;
154-
if (parts[0].length() == 0 && --partsHi != 0) {
155-
return null; // ^: requires ^::
156-
}
157-
if (parts[parts.length - 1].length() == 0 && --partsLo != 0) {
158-
return null; // :$ requires ::$
230+
if (currentHextetStart != length) {
231+
// Handle the last hextet
232+
if (putHextet(bytes, currentHextet) == false) {
233+
return null;
159234
}
160-
} else {
161-
// Otherwise, allocate the entire address to partsHi. The endpoints
162-
// could still be empty, but parseHextet() will check for that.
163-
partsHi = parts.length;
164-
partsLo = 0;
235+
hextetIndex++;
165236
}
166237

167-
// If we found a ::, then we must have skipped at least one part.
168-
// Otherwise, we must have exactly the right number of parts.
169-
int partsSkipped = IPV6_PART_COUNT - (partsHi + partsLo);
170-
if ((skipIndex >= 0 ? partsSkipped >= 1 : partsSkipped == 0) == false) {
171-
return null;
238+
if (compressedHextetIndex >= 0) {
239+
if (hextetIndex >= IPV6_PART_COUNT) {
240+
return null; // Invalid, too many hextets
241+
}
242+
shiftHextetsRight(bytes, compressedHextetIndex, hextetIndex);
243+
} else if (hextetIndex != IPV6_PART_COUNT) {
244+
return null; // Invalid, not enough hextets
172245
}
173246

174-
// Now parse the hextets into a byte array.
175-
ByteBuffer rawBytes = ByteBuffer.allocate(2 * IPV6_PART_COUNT);
176-
try {
177-
for (int i = 0; i < partsHi; i++) {
178-
rawBytes.putShort(parseHextet(parts[i]));
179-
}
180-
for (int i = 0; i < partsSkipped; i++) {
181-
rawBytes.putShort((short) 0);
182-
}
183-
for (int i = partsLo; i > 0; i--) {
184-
rawBytes.putShort(parseHextet(parts[parts.length - i]));
185-
}
186-
} catch (NumberFormatException ex) {
187-
return null;
247+
return bytes.array();
248+
}
249+
250+
private static void shiftHextetsRight(ByteBuffer bytes, int start, int end) {
251+
int shift = IPV6_PART_COUNT - end;
252+
for (int hextetIndexToShift = end - 1; hextetIndexToShift >= start; hextetIndexToShift--) {
253+
int bytesIndexBeforeShift = hextetIndexToShift * Short.BYTES;
254+
short hextetToShift = bytes.getShort(bytesIndexBeforeShift);
255+
bytes.putShort(bytesIndexBeforeShift, (short) 0);
256+
bytes.putShort(bytesIndexBeforeShift + shift * Short.BYTES, hextetToShift);
188257
}
189-
return rawBytes.array();
190258
}
191259

192-
private static short parseHextet(String ipPart) {
193-
// Note: we already verified that this string contains only hex digits.
194-
int hextet = Integer.parseInt(ipPart, 16);
260+
private static boolean putHextet(ByteBuffer buf, int hextet) {
261+
if (buf.remaining() < 2) {
262+
return false;
263+
}
195264
if (hextet > 0xffff) {
196-
throw new NumberFormatException();
265+
return false;
197266
}
198-
return (short) hextet;
267+
buf.putShort((short) hextet);
268+
return true;
199269
}
200270

201271
/**
@@ -345,11 +415,29 @@ private static String hextetsToIPv6String(int[] hextets) {
345415
* @throws IllegalArgumentException if the argument is not a valid IP string literal
346416
*/
347417
public static InetAddress forString(String ipString) {
348-
byte[] addr = ipStringToBytes(ipString);
418+
return forString(new Text(ipString).bytes());
419+
}
420+
421+
/**
422+
* A variant of {@link #forString(String)} that accepts an {@link XContentString.UTF8Bytes} object,
423+
* which utilizes a more efficient implementation for parsing the IP address.
424+
*/
425+
public static InetAddress forString(XContentString.UTF8Bytes bytes) {
426+
return forString(bytes.bytes(), bytes.offset(), bytes.length());
427+
}
428+
429+
/**
430+
* A variant of {@link #forString(String)} that accepts a byte array,
431+
* which utilizes a more efficient implementation for parsing the IP address.
432+
*/
433+
public static InetAddress forString(byte[] ipUtf8, int offset, int length) {
434+
byte[] addr = ipStringToBytes(ipUtf8, offset, length, false);
349435

350436
// The argument was malformed, i.e. not an IP string literal.
351437
if (addr == null) {
352-
throw new IllegalArgumentException(String.format(Locale.ROOT, "'%s' is not an IP string literal.", ipString));
438+
throw new IllegalArgumentException(
439+
String.format(Locale.ROOT, "'%s' is not an IP string literal.", new String(ipUtf8, offset, length, StandardCharsets.UTF_8))
440+
);
353441
}
354442

355443
return bytesToInetAddress(addr);

0 commit comments

Comments
 (0)