Skip to content

Commit 32ff026

Browse files
authored
Optimize dotCount in expanding dot parser (#135263)
This commit optimises the implementation of dotCount, in order to avoid costly walking each character. The implementation change is small, but defers to String::indexOf, which is backed by a vectorized JVM Hotspot intrinsic. I noticed this method showing up in cpu profiles, in some cases consuming ~2% when there are even no fields with dots! After the change this mostly drops away.
1 parent 3dcec80 commit 32ff026

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

docs/changelog/135263.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135263
2+
summary: Optimize `dotCount` in expanding dot parser
3+
area: "Mapping"
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,9 @@ final class FieldTypeLookup {
153153

154154
public static int dotCount(String path) {
155155
int dotCount = 0;
156-
for (int i = 0; i < path.length(); i++) {
157-
if (path.charAt(i) == '.') {
158-
dotCount++;
159-
}
156+
int index = -1;
157+
while ((index = path.indexOf('.', index + 1)) != -1) {
158+
dotCount++;
160159
}
161160
return dotCount;
162161
}

server/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.test.ESTestCase;
1515
import org.hamcrest.Matchers;
1616

17+
import java.nio.charset.StandardCharsets;
1718
import java.util.ArrayList;
1819
import java.util.Arrays;
1920
import java.util.Collection;
@@ -522,6 +523,31 @@ public void testNoRootAliasForPassThroughFieldOnConflictingField() {
522523
assertEquals(foo.fieldType(), lookup.get("foo"));
523524
}
524525

526+
public void testDotCount() {
527+
assertEquals(0, FieldTypeLookup.dotCount(""));
528+
assertEquals(1, FieldTypeLookup.dotCount("."));
529+
assertEquals(2, FieldTypeLookup.dotCount(".."));
530+
assertEquals(3, FieldTypeLookup.dotCount("..."));
531+
assertEquals(4, FieldTypeLookup.dotCount("...."));
532+
assertEquals(0, FieldTypeLookup.dotCount("foo"));
533+
assertEquals(1, FieldTypeLookup.dotCount("foo.bar"));
534+
assertEquals(2, FieldTypeLookup.dotCount("foo.bar.baz"));
535+
assertEquals(3, FieldTypeLookup.dotCount("foo.bar.baz.bob"));
536+
assertEquals(4, FieldTypeLookup.dotCount("foo.bar.baz.bob."));
537+
assertEquals(4, FieldTypeLookup.dotCount("foo..bar.baz.bob"));
538+
assertEquals(5, FieldTypeLookup.dotCount("foo..bar..baz.bob"));
539+
assertEquals(6, FieldTypeLookup.dotCount("foo..bar..baz.bob."));
540+
541+
int times = atLeast(50);
542+
for (int i = 0; i < times; i++) {
543+
byte[] bytes = new byte[randomInt(1024)];
544+
random().nextBytes(bytes);
545+
String s = new String(bytes, StandardCharsets.UTF_8);
546+
int expected = s.chars().map(c -> c == '.' ? 1 : 0).sum();
547+
assertEquals(expected, FieldTypeLookup.dotCount(s));
548+
}
549+
}
550+
525551
@SafeVarargs
526552
@SuppressWarnings("varargs")
527553
static <T> List<T> randomizedList(T... values) {

0 commit comments

Comments
 (0)