Skip to content

Commit ebf5d42

Browse files
committed
Some micro-microbenchmarks
1 parent a01ce9d commit ebf5d42

File tree

5 files changed

+198
-6
lines changed

5 files changed

+198
-6
lines changed

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ dependencies {
4545
api(project(':x-pack:plugin:esql'))
4646
api(project(':x-pack:plugin:esql:compute'))
4747
implementation project(path: ':libs:simdvec')
48+
implementation project(':x-pack:plugin:mapper-patterned-text')
4849
expression(project(path: ':modules:lang-expression', configuration: 'zip'))
4950
painless(project(path: ':modules:lang-painless', configuration: 'zip'))
5051
nativeLib(project(':libs:native'))
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.index.mapper;
11+
12+
import org.elasticsearch.xpack.patternedtext.PatternedTextValueProcessor;
13+
import org.openjdk.jmh.annotations.Benchmark;
14+
import org.openjdk.jmh.annotations.BenchmarkMode;
15+
import org.openjdk.jmh.annotations.Fork;
16+
import org.openjdk.jmh.annotations.Measurement;
17+
import org.openjdk.jmh.annotations.Mode;
18+
import org.openjdk.jmh.annotations.OutputTimeUnit;
19+
import org.openjdk.jmh.annotations.Param;
20+
import org.openjdk.jmh.annotations.Scope;
21+
import org.openjdk.jmh.annotations.State;
22+
import org.openjdk.jmh.annotations.Warmup;
23+
import org.openjdk.jmh.infra.Blackhole;
24+
25+
import java.util.concurrent.TimeUnit;
26+
27+
@BenchmarkMode(Mode.AverageTime)
28+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
29+
@Fork(value = 1)
30+
@Warmup(iterations = 2)
31+
@Measurement(iterations = 3)
32+
@State(Scope.Benchmark)
33+
public class PatternedTextMapperOperationsBenchmark {
34+
@Param(
35+
{
36+
"550e8400-e29b-41d4-a716-446655440000", // valid UUID
37+
"not-a-uuid", // early identifiable invalid UUID
38+
"123e4567-e89b-12d3-a456-4266141740000" // late identifiable invalid UUID
39+
}
40+
)
41+
public String uuid;
42+
43+
@Benchmark
44+
public void testUuidMatchManual(Blackhole blackhole) {
45+
blackhole.consume(PatternedTextValueProcessor.isUUID_manual(uuid));
46+
}
47+
48+
@Benchmark
49+
public void testUuidMatchManualWithValidation(Blackhole blackhole) {
50+
blackhole.consume(PatternedTextValueProcessor.isUUID_manual_withValidation(uuid));
51+
}
52+
53+
@Benchmark
54+
public void testUuidMatchRegex(Blackhole blackhole) {
55+
blackhole.consume(PatternedTextValueProcessor.isUUID_regex(uuid));
56+
}
57+
58+
@Param({ "172.16.0", "255.255.255.255" })
59+
public String ip;
60+
61+
@Benchmark
62+
public void testIpv4MatchManual(Blackhole blackhole) {
63+
blackhole.consume(PatternedTextValueProcessor.isIpv4_manual(ip));
64+
}
65+
66+
@Benchmark
67+
public void testIpv4MatchManual_Iterative(Blackhole blackhole) {
68+
blackhole.consume(PatternedTextValueProcessor.isIpv4_manual_iterative(ip));
69+
}
70+
71+
@Benchmark
72+
public void testIpv4MatchRegex(Blackhole blackhole) {
73+
blackhole.consume(PatternedTextValueProcessor.isIpv4_regex(ip));
74+
}
75+
}

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapper.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
154154

155155
// Add template docvalues and index.
156156
context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template())));
157+
// todo: calling templateStripped() right after split() seems like a waste, would be better to do it in the split() method
157158
context.doc().add(new Field(fieldType().templateFieldName(), parts.templateStripped(), templateFieldType));
158159

159160
// Add timestamp docvalues.

x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessor.java

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ public class PatternedTextValueProcessor {
3131
"^(\\d{4})[-/](\\d{2})[-/](\\d{2})[T ](\\d{2}):(\\d{2}):(\\d{2})(\\.(\\d{3})Z?)?[ ]?([\\+\\-]\\d{2}([:]?\\d{2})?)?$"
3232
);
3333

34+
public static final Pattern IPv4_PATTERN = Pattern.compile(
35+
"^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
36+
);
37+
38+
private static final Pattern UUID_PATTERN = Pattern.compile("^[0-9A-Fa-f]{8}-(?:[0-9A-Fa-f]{4}-){3}[0-9A-Fa-f]{12}$");
39+
3440
record Parts(String template, Long timestamp, List<String> args, String indexed) {
3541
String templateStripped() {
3642
List<String> stripped = new ArrayList<>();
@@ -135,6 +141,19 @@ private static boolean isTimestamp(String text) {
135141
}
136142

137143
private static boolean isIpv4(String text, byte[] bytes) {
144+
boolean isIPv4 = isIpv4_manual(text);
145+
if (isIPv4 == false) {
146+
return false;
147+
}
148+
// redundant duplicated split, only done to be able to measure isIpv4_manual() performance in isolation
149+
String[] tokens = text.split("\\.");
150+
for (int i = 0; i < 4; i++) {
151+
bytes[i] = (byte) Integer.parseInt(tokens[i]);
152+
}
153+
return true;
154+
}
155+
156+
public static boolean isIpv4_manual(String text) {
138157
String[] tokens = text.split("\\.");
139158
if (tokens.length != 4) {
140159
return false;
@@ -149,12 +168,41 @@ private static boolean isIpv4(String text, byte[] bytes) {
149168
}
150169
}
151170
}
152-
for (int i = 0; i < 4; i++) {
153-
bytes[i] = (byte) Integer.parseInt(tokens[i]);
154-
}
171+
// todo: this still doesn't check that each octet is between 0 and 255
172+
// validation may be more important for IPs, as we can potentially assign it to an IP field type
155173
return true;
156174
}
157175

176+
public static boolean isIpv4_manual_iterative(String text) {
177+
if (text.length() < 7 || text.length() > 15) {
178+
return false;
179+
}
180+
int numOctets = 1;
181+
int octet = 0;
182+
for (int i = 0; i < text.length(); i++) {
183+
char c = text.charAt(i);
184+
if (c == '.') {
185+
numOctets++;
186+
if (numOctets > 4) {
187+
return false;
188+
}
189+
octet = 0;
190+
} else if (Character.isDigit(c)) {
191+
octet = octet * 10 + c - '0';
192+
if (octet > 255) {
193+
return false;
194+
}
195+
} else {
196+
return false;
197+
}
198+
}
199+
return numOctets == 4;
200+
}
201+
202+
public static boolean isIpv4_regex(String text) {
203+
return IPv4_PATTERN.matcher(text).matches();
204+
}
205+
158206
private static String toIPv4(byte[] bytes) {
159207
assert bytes.length == 4 : bytes.length;
160208
return Byte.toUnsignedInt(bytes[0])
@@ -168,7 +216,7 @@ private static String toIPv4(byte[] bytes) {
168216

169217
private static boolean isUUID(String text, byte[] bytes) {
170218
assert bytes.length == 16 : bytes.length;
171-
if (text.length() == 36 && text.charAt(8) == '-' && text.charAt(13) == '-' && text.charAt(18) == '-' && text.charAt(23) == '-') {
219+
if (isUUID_manual(text)) {
172220
UUID uuid = UUID.fromString(text);
173221
ByteUtils.writeLongLE(uuid.getMostSignificantBits(), bytes, 0);
174222
ByteUtils.writeLongLE(uuid.getLeastSignificantBits(), bytes, 8);
@@ -177,15 +225,42 @@ private static boolean isUUID(String text, byte[] bytes) {
177225
return false;
178226
}
179227

228+
public static boolean isUUID_manual(String text) {
229+
// this does not verify that the input contains only hexadecimal characters, but it is extremely cheap and the effect of
230+
// false positives is negligible, so it should be good enough
231+
return text.length() == 36 && text.charAt(8) == '-' && text.charAt(13) == '-' && text.charAt(18) == '-' && text.charAt(23) == '-';
232+
}
233+
234+
public static boolean isUUID_manual_withValidation(String text) {
235+
if (text.length() != 36) {
236+
return false;
237+
}
238+
for (int i = 0; i < 36; i++) {
239+
char c = text.charAt(i);
240+
if (i == 8 || i == 13 || i == 18 || i == 23) {
241+
if (c != '-') {
242+
return false;
243+
}
244+
} else if (Character.digit(c, 16) == -1) {
245+
return false;
246+
}
247+
}
248+
return true;
249+
}
250+
251+
public static boolean isUUID_regex(String text) {
252+
return UUID_PATTERN.matcher(text).matches();
253+
}
254+
180255
private static String toUUID(byte[] bytes) {
181256
assert bytes.length == 16 : bytes.length;
182257
UUID uuid = new UUID(ByteUtils.readLongLE(bytes, 0), ByteUtils.readLongLE(bytes, 8));
183258
return uuid.toString();
184259
}
185260

186261
private static boolean isArg(String text) {
187-
for (char ch : text.toCharArray()) {
188-
if (Character.isDigit(ch)) {
262+
for (int i = 0; i < text.length(); i++) {
263+
if (Character.isDigit(text.charAt(i))) {
189264
return true;
190265
}
191266
}

x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessorTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,44 @@ public void testSecondDate() {
9797
PatternedTextValueProcessor.merge(parts)
9898
);
9999
}
100+
101+
public void testIsUUID() {
102+
String[] validUUIDs = { "123e4567-e89b-12d3-a456-426614174000", "550e8400-e29b-41d4-a716-446655440000" };
103+
104+
String[] invalidUUIDs = {
105+
"not-a-uuid", // very invalid
106+
"550e8400-e29b-41d4-a716-4466554400000" // Invalid last extra character
107+
};
108+
109+
for (String uuid : validUUIDs) {
110+
assertTrue("Expected valid UUID: " + uuid, PatternedTextValueProcessor.isUUID_manual(uuid));
111+
assertTrue("Expected valid UUID: " + uuid, PatternedTextValueProcessor.isUUID_regex(uuid));
112+
}
113+
114+
for (String uuid : invalidUUIDs) {
115+
assertFalse("Expected invalid UUID: " + uuid, PatternedTextValueProcessor.isUUID_manual(uuid));
116+
assertFalse("Expected invalid UUID: " + uuid, PatternedTextValueProcessor.isUUID_regex(uuid));
117+
}
118+
}
119+
120+
public void testIsIPv4() {
121+
String[] validIPv4s = { "192.168.1.1", "10.0.0.1", "172.16.0.1", "255.255.255.255", "0.0.0.0" };
122+
123+
String[] invalidIPv4s = {
124+
"256.256.256.256", // Out of range
125+
"192.168.1", // Missing one octet
126+
"192.168.1.1.1", // Extra octet
127+
"192.168.1.a" // Invalid character
128+
};
129+
130+
for (String ip : validIPv4s) {
131+
assertTrue("Expected valid IPv4: " + ip, PatternedTextValueProcessor.isIpv4_manual_iterative(ip));
132+
assertTrue("Expected valid IPv4: " + ip, PatternedTextValueProcessor.isIpv4_regex(ip));
133+
}
134+
135+
for (String ip : invalidIPv4s) {
136+
assertFalse("Expected invalid IPv4: " + ip, PatternedTextValueProcessor.isIpv4_regex(ip));
137+
assertFalse("Expected invalid IPv4: " + ip, PatternedTextValueProcessor.isIpv4_manual_iterative(ip));
138+
}
139+
}
100140
}

0 commit comments

Comments
 (0)