From 954666712c98c217e94d29b6cc0d34046d67d725 Mon Sep 17 00:00:00 2001 From: Joe Gallo Date: Wed, 29 Jan 2025 13:38:43 -0500 Subject: [PATCH] Optimize IngestDocument FieldPath allocation (#120573) --- docs/changelog/120573.yaml | 5 ++ .../elasticsearch/ingest/IngestDocument.java | 56 ++++++++++++++----- 2 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 docs/changelog/120573.yaml diff --git a/docs/changelog/120573.yaml b/docs/changelog/120573.yaml new file mode 100644 index 0000000000000..33ced06ddf996 --- /dev/null +++ b/docs/changelog/120573.yaml @@ -0,0 +1,5 @@ +pr: 120573 +summary: Optimize `IngestDocument` `FieldPath` allocation +area: Ingest Node +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/ingest/IngestDocument.java b/server/src/main/java/org/elasticsearch/ingest/IngestDocument.java index 9ab5e11524f9b..97cd738eaf5bd 100644 --- a/server/src/main/java/org/elasticsearch/ingest/IngestDocument.java +++ b/server/src/main/java/org/elasticsearch/ingest/IngestDocument.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.util.Maps; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.mapper.IdFieldMapper; @@ -189,8 +190,8 @@ public T getFieldValue(String path, Class clazz) { * or if the field that is found at the provided path is not of the expected type. */ public T getFieldValue(String path, Class clazz, boolean ignoreMissing) { - FieldPath fieldPath = new FieldPath(path); - Object context = fieldPath.initialContext; + final FieldPath fieldPath = FieldPath.of(path); + Object context = fieldPath.initialContext(this); for (String pathElement : fieldPath.pathElements) { ResolveResult result = resolve(pathElement, path, context); if (result.wasSuccessful) { @@ -260,8 +261,8 @@ public boolean hasField(String path) { * @throws IllegalArgumentException if the path is null, empty or invalid. */ public boolean hasField(String path, boolean failOutOfRange) { - FieldPath fieldPath = new FieldPath(path); - Object context = fieldPath.initialContext; + final FieldPath fieldPath = FieldPath.of(path); + Object context = fieldPath.initialContext(this); for (int i = 0; i < fieldPath.pathElements.length - 1; i++) { String pathElement = fieldPath.pathElements[i]; if (context == null) { @@ -328,8 +329,8 @@ public boolean hasField(String path, boolean failOutOfRange) { * @throws IllegalArgumentException if the path is null, empty, invalid or if the field doesn't exist. */ public void removeField(String path) { - FieldPath fieldPath = new FieldPath(path); - Object context = fieldPath.initialContext; + final FieldPath fieldPath = FieldPath.of(path); + Object context = fieldPath.initialContext(this); for (int i = 0; i < fieldPath.pathElements.length - 1; i++) { ResolveResult result = resolve(fieldPath.pathElements[i], path, context); if (result.wasSuccessful) { @@ -543,8 +544,8 @@ public void setFieldValue(String path, Object value, boolean ignoreEmptyValue) { } private void setFieldValue(String path, Object value, boolean append, boolean allowDuplicates) { - FieldPath fieldPath = new FieldPath(path); - Object context = fieldPath.initialContext; + final FieldPath fieldPath = FieldPath.of(path); + Object context = fieldPath.initialContext(this); for (int i = 0; i < fieldPath.pathElements.length - 1; i++) { String pathElement = fieldPath.pathElements[i]; if (context == null) { @@ -995,21 +996,45 @@ public String getFieldName() { } } - private class FieldPath { + private static final class FieldPath { - private final String[] pathElements; - private final Object initialContext; + private static final int MAX_SIZE = 512; + private static final Map CACHE = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(); - private FieldPath(String path) { + // constructing a new FieldPath requires that we parse a String (e.g. "foo.bar.baz") into an array + // of path elements (e.g. ["foo", "bar", "baz"]). Calling String#split results in the allocation + // of an ArrayList to hold the results, then a new String is created for each path element, and + // then finally a String[] is allocated to hold the actual result -- in addition to all that, we + // do some processing ourselves on the path and path elements to validate and prepare them. + // the above CACHE and the below 'FieldPath.of' method allow us to almost always avoid this work. + + static FieldPath of(String path) { if (Strings.isEmpty(path)) { throw new IllegalArgumentException("path cannot be null nor empty"); } + FieldPath res = CACHE.get(path); + if (res != null) { + return res; + } + res = new FieldPath(path); + if (CACHE.size() > MAX_SIZE) { + CACHE.clear(); + } + CACHE.put(path, res); + return res; + } + + private final String[] pathElements; + private final boolean useIngestContext; + + // you shouldn't call this directly, use the FieldPath.of method above instead! + private FieldPath(String path) { String newPath; if (path.startsWith(INGEST_KEY_PREFIX)) { - initialContext = ingestMetadata; + useIngestContext = true; newPath = path.substring(INGEST_KEY_PREFIX.length()); } else { - initialContext = ctxMap; + useIngestContext = false; if (path.startsWith(SOURCE_PREFIX)) { newPath = path.substring(SOURCE_PREFIX.length()); } else { @@ -1022,6 +1047,9 @@ private FieldPath(String path) { } } + public Object initialContext(IngestDocument document) { + return useIngestContext ? document.getIngestMetadata() : document.getCtxMap(); + } } private static class ResolveResult {