Skip to content

Commit 6884dbb

Browse files
authored
Reserve square bracket syntax for ingest document flexible field access pattern (#134172)
Adds validation to field path parsing to ensure that no square brackets are used in the path when flexible mode is enabled. We plan to introduce array indexing syntax to flexible mode in the future (`a.b[1].c`), but only after it is more widely supported in the ingest node APIs. To ensure that users do not make a habit of using this reserved syntax going forward, we are reserving it to ensure that it remains unused until it is supported.
1 parent 1a0f092 commit 6884dbb

File tree

2 files changed

+152
-21
lines changed

2 files changed

+152
-21
lines changed

server/src/main/java/org/elasticsearch/ingest/IngestDocument.java

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import java.util.List;
4141
import java.util.Map;
4242
import java.util.Objects;
43+
import java.util.Optional;
4344
import java.util.Set;
4445
import java.util.function.BiConsumer;
4546
import java.util.stream.Collectors;
@@ -201,7 +202,7 @@ public <T> T getFieldValue(String path, Class<T> clazz) {
201202
* or if the field that is found at the provided path is not of the expected type.
202203
*/
203204
public <T> T getFieldValue(String path, Class<T> clazz, boolean ignoreMissing) {
204-
final FieldPath fieldPath = FieldPath.of(path);
205+
final FieldPath fieldPath = FieldPath.of(path, getCurrentAccessPatternSafe());
205206
Object context = fieldPath.initialContext(this);
206207
ResolveResult result = resolve(fieldPath.pathElements, fieldPath.pathElements.length, path, context, getCurrentAccessPatternSafe());
207208
if (result.wasSuccessful) {
@@ -270,7 +271,7 @@ public boolean hasField(String path) {
270271
* @throws IllegalArgumentException if the path is null, empty or invalid.
271272
*/
272273
public boolean hasField(String path, boolean failOutOfRange) {
273-
final FieldPath fieldPath = FieldPath.of(path);
274+
final FieldPath fieldPath = FieldPath.of(path, getCurrentAccessPatternSafe());
274275
Object context = fieldPath.initialContext(this);
275276
int leafKeyIndex = fieldPath.pathElements.length - 1;
276277
int lastContainerIndex = fieldPath.pathElements.length - 2;
@@ -424,7 +425,7 @@ public void removeField(String path) {
424425
* @throws IllegalArgumentException if the path is null, empty, or invalid; or if the field doesn't exist (and ignoreMissing is false).
425426
*/
426427
public void removeField(String path, boolean ignoreMissing) {
427-
final FieldPath fieldPath = FieldPath.of(path);
428+
final FieldPath fieldPath = FieldPath.of(path, getCurrentAccessPatternSafe());
428429
Object context = fieldPath.initialContext(this);
429430
String leafKey = fieldPath.pathElements[fieldPath.pathElements.length - 1];
430431
ResolveResult result = resolve(
@@ -734,7 +735,7 @@ public void setFieldValue(String path, Object value, boolean ignoreEmptyValue) {
734735
}
735736

736737
private void setFieldValue(String path, Object value, boolean append, boolean allowDuplicates) {
737-
final FieldPath fieldPath = FieldPath.of(path);
738+
final FieldPath fieldPath = FieldPath.of(path, getCurrentAccessPatternSafe());
738739
Object context = fieldPath.initialContext(this);
739740
int leafKeyIndex = fieldPath.pathElements.length - 1;
740741
int lastContainerIndex = fieldPath.pathElements.length - 2;
@@ -1155,18 +1156,18 @@ List<String> getPipelineStack() {
11551156
}
11561157

11571158
/**
1158-
* @return The access pattern for any currently executing pipelines, or null if no pipelines are in progress for this doc
1159+
* @return The access pattern for any currently executing pipelines, or empty if no pipelines are in progress for this doc
11591160
*/
1160-
public IngestPipelineFieldAccessPattern getCurrentAccessPattern() {
1161-
return accessPatternStack.peek();
1161+
public Optional<IngestPipelineFieldAccessPattern> getCurrentAccessPattern() {
1162+
return Optional.ofNullable(accessPatternStack.peek());
11621163
}
11631164

11641165
/**
11651166
* @return The access pattern for any currently executing pipelines, or {@link IngestPipelineFieldAccessPattern#CLASSIC} if no
11661167
* pipelines are in progress for this doc for the sake of backwards compatibility
11671168
*/
11681169
private IngestPipelineFieldAccessPattern getCurrentAccessPatternSafe() {
1169-
return Objects.requireNonNullElse(getCurrentAccessPattern(), IngestPipelineFieldAccessPattern.CLASSIC);
1170+
return getCurrentAccessPattern().orElse(IngestPipelineFieldAccessPattern.CLASSIC);
11701171
}
11711172

11721173
/**
@@ -1290,8 +1291,15 @@ public String getFieldName() {
12901291

12911292
private static final class FieldPath {
12921293

1294+
/**
1295+
* A compound cache key for tracking previously parsed field paths
1296+
* @param path The field path as given by the caller
1297+
* @param accessPattern The access pattern used to parse the field path
1298+
*/
1299+
private record CacheKey(String path, IngestPipelineFieldAccessPattern accessPattern) {}
1300+
12931301
private static final int MAX_SIZE = 512;
1294-
private static final Map<String, FieldPath> CACHE = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
1302+
private static final Map<CacheKey, FieldPath> CACHE = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
12951303

12961304
// constructing a new FieldPath requires that we parse a String (e.g. "foo.bar.baz") into an array
12971305
// of path elements (e.g. ["foo", "bar", "baz"]). Calling String#split results in the allocation
@@ -1300,27 +1308,28 @@ private static final class FieldPath {
13001308
// do some processing ourselves on the path and path elements to validate and prepare them.
13011309
// the above CACHE and the below 'FieldPath.of' method allow us to almost always avoid this work.
13021310

1303-
static FieldPath of(String path) {
1311+
static FieldPath of(String path, IngestPipelineFieldAccessPattern accessPattern) {
13041312
if (Strings.isEmpty(path)) {
13051313
throw new IllegalArgumentException("path cannot be null nor empty");
13061314
}
1307-
FieldPath res = CACHE.get(path);
1315+
CacheKey cacheKey = new CacheKey(path, accessPattern);
1316+
FieldPath res = CACHE.get(cacheKey);
13081317
if (res != null) {
13091318
return res;
13101319
}
1311-
res = new FieldPath(path);
1320+
res = new FieldPath(path, accessPattern);
13121321
if (CACHE.size() > MAX_SIZE) {
13131322
CACHE.clear();
13141323
}
1315-
CACHE.put(path, res);
1324+
CACHE.put(cacheKey, res);
13161325
return res;
13171326
}
13181327

13191328
private final String[] pathElements;
13201329
private final boolean useIngestContext;
13211330

13221331
// you shouldn't call this directly, use the FieldPath.of method above instead!
1323-
private FieldPath(String path) {
1332+
private FieldPath(String path, IngestPipelineFieldAccessPattern accessPattern) {
13241333
String newPath;
13251334
if (path.startsWith(INGEST_KEY_PREFIX)) {
13261335
useIngestContext = true;
@@ -1333,10 +1342,47 @@ private FieldPath(String path) {
13331342
newPath = path;
13341343
}
13351344
}
1336-
this.pathElements = newPath.split("\\.");
1337-
if (pathElements.length == 1 && pathElements[0].isEmpty()) {
1338-
throw new IllegalArgumentException("path [" + path + "] is not valid");
1345+
String[] pathParts = newPath.split("\\.");
1346+
this.pathElements = processPathParts(path, pathParts, accessPattern);
1347+
}
1348+
1349+
private static String[] processPathParts(String fullPath, String[] pathParts, IngestPipelineFieldAccessPattern accessPattern) {
1350+
return switch (accessPattern) {
1351+
case CLASSIC -> validateClassicFields(fullPath, pathParts);
1352+
case FLEXIBLE -> parseFlexibleFields(fullPath, pathParts);
1353+
};
1354+
}
1355+
1356+
/**
1357+
* Parses path syntax that is specific to the {@link IngestPipelineFieldAccessPattern#CLASSIC} ingest doc access pattern. Supports
1358+
* syntax like context aware array access.
1359+
* @param fullPath The un-split path to use for error messages
1360+
* @param pathParts The tokenized field path to parse
1361+
* @return An array of Strings
1362+
*/
1363+
private static String[] validateClassicFields(String fullPath, String[] pathParts) {
1364+
for (String pathPart : pathParts) {
1365+
if (pathPart.isEmpty()) {
1366+
throw new IllegalArgumentException("path [" + fullPath + "] is not valid");
1367+
}
1368+
}
1369+
return pathParts;
1370+
}
1371+
1372+
/**
1373+
* Parses path syntax that is specific to the {@link IngestPipelineFieldAccessPattern#FLEXIBLE} ingest doc access pattern. Supports
1374+
* syntax like square bracket array access, which is the only way to index arrays in flexible mode.
1375+
* @param fullPath The un-split path to use for error messages
1376+
* @param pathParts The tokenized field path to parse
1377+
* @return An array of Strings
1378+
*/
1379+
private static String[] parseFlexibleFields(String fullPath, String[] pathParts) {
1380+
for (String pathPart : pathParts) {
1381+
if (pathPart.isEmpty() || pathPart.contains("[") || pathPart.contains("]")) {
1382+
throw new IllegalArgumentException("path [" + fullPath + "] is not valid");
1383+
}
13391384
}
1385+
return pathParts;
13401386
}
13411387

13421388
public Object initialContext(IngestDocument document) {

server/src/test/java/org/elasticsearch/ingest/IngestDocumentTests.java

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,90 @@ private void doWithRandomAccessPattern(Consumer<IngestDocument> action) throws E
189189
doWithAccessPattern(randomFrom(IngestPipelineFieldAccessPattern.values()), action);
190190
}
191191

192+
private void assertPathValid(IngestDocument doc, String path) {
193+
// The fields being checked do not exist, so they all return false when running hasField
194+
assertFalse(doc.hasField(path));
195+
}
196+
197+
private void assertPathInvalid(IngestDocument doc, String path, String errorMessage) {
198+
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> doc.hasField(path));
199+
assertThat(expected.getMessage(), equalTo(errorMessage));
200+
}
201+
202+
public void testPathParsingLogic() throws Exception {
203+
// Force a blank document for this test
204+
document = new IngestDocument("index", "id", 1, null, null, new HashMap<>());
205+
206+
doWithRandomAccessPattern((doc) -> {
207+
assertPathInvalid(doc, null, "path cannot be null nor empty");
208+
assertPathInvalid(doc, "", "path cannot be null nor empty");
209+
assertPathValid(doc, "a");
210+
assertPathValid(doc, "ab");
211+
assertPathValid(doc, "abc");
212+
assertPathValid(doc, "a.b");
213+
assertPathValid(doc, "a.b.c");
214+
// Trailing empty strings are trimmed by field path parsing logic
215+
assertPathValid(doc, "a.");
216+
assertPathValid(doc, "a..");
217+
assertPathValid(doc, "a...");
218+
// Empty field names are not allowed in the beginning or middle of the path though
219+
assertPathInvalid(doc, ".a.b", "path [.a.b] is not valid");
220+
assertPathInvalid(doc, "a..b", "path [a..b] is not valid");
221+
});
222+
223+
doWithAccessPattern(CLASSIC, (doc) -> {
224+
// Classic allows number fields because they are treated as either field names or array indices depending on context
225+
assertPathValid(doc, "a.0");
226+
// Classic allows square brackets because it is not part of it's syntax
227+
assertPathValid(doc, "a[0]");
228+
assertPathValid(doc, "a[]");
229+
assertPathValid(doc, "a][");
230+
assertPathValid(doc, "[");
231+
assertPathValid(doc, "a[");
232+
assertPathValid(doc, "[a");
233+
assertPathValid(doc, "]");
234+
assertPathValid(doc, "a]");
235+
assertPathValid(doc, "]a");
236+
assertPathValid(doc, "[]");
237+
assertPathValid(doc, "][");
238+
assertPathValid(doc, "[a]");
239+
assertPathValid(doc, "]a[");
240+
assertPathValid(doc, "[]a");
241+
assertPathValid(doc, "][a");
242+
});
243+
244+
doWithAccessPattern(FLEXIBLE, (doc) -> {
245+
// Flexible has specific handling of square brackets
246+
assertPathInvalid(doc, "a[0]", "path [a[0]] is not valid");
247+
assertPathInvalid(doc, "a[]", "path [a[]] is not valid");
248+
assertPathInvalid(doc, "a][", "path [a][] is not valid");
249+
assertPathInvalid(doc, "[", "path [[] is not valid");
250+
assertPathInvalid(doc, "a[", "path [a[] is not valid");
251+
assertPathInvalid(doc, "[a", "path [[a] is not valid");
252+
assertPathInvalid(doc, "]", "path []] is not valid");
253+
assertPathInvalid(doc, "a]", "path [a]] is not valid");
254+
assertPathInvalid(doc, "]a", "path []a] is not valid");
255+
assertPathInvalid(doc, "[]", "path [[]] is not valid");
256+
assertPathInvalid(doc, "][", "path [][] is not valid");
257+
assertPathInvalid(doc, "[a]", "path [[a]] is not valid");
258+
assertPathInvalid(doc, "]a[", "path []a[] is not valid");
259+
assertPathInvalid(doc, "[]a", "path [[]a] is not valid");
260+
assertPathInvalid(doc, "][a", "path [][a] is not valid");
261+
262+
assertPathInvalid(doc, "a[0].b", "path [a[0].b] is not valid");
263+
assertPathInvalid(doc, "a[0].b[1]", "path [a[0].b[1]] is not valid");
264+
assertPathInvalid(doc, "a[0].b[1].c", "path [a[0].b[1].c] is not valid");
265+
assertPathInvalid(doc, "a[0].b[1].c[2]", "path [a[0].b[1].c[2]] is not valid");
266+
assertPathInvalid(doc, "a[0][1].c[2]", "path [a[0][1].c[2]] is not valid");
267+
assertPathInvalid(doc, "a[0].b[1][2]", "path [a[0].b[1][2]] is not valid");
268+
assertPathInvalid(doc, "a[0][1][2]", "path [a[0][1][2]] is not valid");
269+
270+
assertPathInvalid(doc, "a[0][", "path [a[0][] is not valid");
271+
assertPathInvalid(doc, "a[0]]", "path [a[0]]] is not valid");
272+
assertPathInvalid(doc, "a[0]blahblah", "path [a[0]blahblah] is not valid");
273+
});
274+
}
275+
192276
public void testSimpleGetFieldValue() throws Exception {
193277
doWithRandomAccessPattern((doc) -> {
194278
assertThat(doc.getFieldValue("foo", String.class), equalTo("bar"));
@@ -2034,7 +2118,7 @@ public void testNestedAccessPatternPropagation() {
20342118

20352119
// At the end of the test, there should be neither pipeline ids nor access patterns left in the stack.
20362120
assertThat(document.getPipelineStack(), is(empty()));
2037-
assertThat(document.getCurrentAccessPattern(), is(nullValue()));
2121+
assertThat(document.getCurrentAccessPattern().isEmpty(), is(true));
20382122
}
20392123

20402124
/**
@@ -2082,7 +2166,8 @@ void doTestNestedAccessPatternPropagation(int level, int maxCallDepth, IngestDoc
20822166

20832167
// Assert expected state
20842168
assertThat(document.getPipelineStack().getFirst(), is(expectedPipelineId));
2085-
assertThat(document.getCurrentAccessPattern(), is(expectedAccessPattern));
2169+
assertThat(document.getCurrentAccessPattern().isPresent(), is(true));
2170+
assertThat(document.getCurrentAccessPattern().get(), is(expectedAccessPattern));
20862171

20872172
// Randomly recurse: We recurse only one time per level to avoid hogging test time, but we randomize which
20882173
// pipeline to recurse on, eventually requiring a recursion on the last pipeline run if one hasn't happened yet.
@@ -2099,11 +2184,11 @@ void doTestNestedAccessPatternPropagation(int level, int maxCallDepth, IngestDoc
20992184
assertThat(document.getPipelineStack().size(), is(equalTo(level)));
21002185
if (level == 0) {
21012186
// Top level means access pattern should be empty
2102-
assertThat(document.getCurrentAccessPattern(), is(nullValue()));
2187+
assertThat(document.getCurrentAccessPattern().isEmpty(), is(true));
21032188
} else {
21042189
// If we're nested below the top level we should still have an access
21052190
// pattern on the document for the pipeline above us
2106-
assertThat(document.getCurrentAccessPattern(), is(not(nullValue())));
2191+
assertThat(document.getCurrentAccessPattern().isPresent(), is(true));
21072192
}
21082193
}
21092194
logger.debug("LEVEL {}/{}: COMPLETE", level, maxCallDepth);

0 commit comments

Comments
 (0)