Skip to content

Commit d74a55a

Browse files
Allow trailing empty string field names in paths of flattened field (#133611) (#133665)
Flattened objects allow for keys with leading and trailing path separators (the period character .). They also allow field names consisting only of path separators. These fields names are then joined by the path separator and whole paths are stored with the associated value as key value pairs. The whole paths are then separated back into their component fields by splitting on the path separator. There is ambiguity whether a given period is meant as a path separator or part of a path name. Because of this, we assume that all periods are path separators. This means that any two adjacent periods in the whole path have an empty string field name between them. The same is true of leading and trailing periods. This is how we already handle adjacent periods that are at the front or middle of the whole path. But due to how the split function works, this is not how trailing adjacent periods are handled. By default the split function does not return trailing empty strings. By adding a negative limit to split, any trailing empty strings are now returned. Thus trailing adjacent periods are now treated the same as leading periods or periods in the middle of the string. More importantly, if the whole path consists of periods, the resulting path string was previously empty, causing an ArrayOutOfBoundsException. With this change, any paths consisting solely of periods will be treated has having empty string field names before and after every period. Fixes #130139 (cherry picked from commit 4661d06) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java
1 parent 84061ac commit d74a55a

File tree

4 files changed

+108
-1
lines changed

4 files changed

+108
-1
lines changed

docs/changelog/133611.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 133611
2+
summary: Allow trailing empty string field names in paths of flattened field
3+
area: Mapping
4+
type: bug
5+
issues:
6+
- 130139

server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ private KeyValue(final String value, final Prefix prefix, final String leaf) {
116116

117117
KeyValue(final BytesRef keyValue) {
118118
this(
119-
FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN),
119+
// Splitting with a negative limit includes trailing empty strings.
120+
// This is needed in case the provide path has trailing path separators.
121+
FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN, -1),
120122
FlattenedFieldParser.extractValue(keyValue).utf8ToString()
121123
);
122124
}

server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,56 @@ public void testSyntheticSourceWithEmptyObject() throws IOException {
931931
{"field":{"key1":"foo"}}"""));
932932
}
933933

934+
public void testSyntheticSourceWithMatchesInNestedPath() throws IOException {
935+
DocumentMapper mapper = createSytheticSourceMapperService(
936+
mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
937+
).documentMapper();
938+
939+
// This test covers a scenario that previously had a bug.
940+
// Since a.b.c and b.b.d have a matching middle key `b`, and b.b.d starts with a `b`,
941+
// startObject was not called for the first `b` in b.b.d.
942+
// For a full explanation see this comment: https://github.com/elastic/elasticsearch/pull/129600#issuecomment-3024476134
943+
var syntheticSource = syntheticSource(mapper, b -> {
944+
b.startObject("field");
945+
{
946+
b.startObject("a");
947+
{
948+
b.startObject("b").field("c", "1").endObject();
949+
}
950+
b.endObject();
951+
b.startObject("b");
952+
{
953+
b.startObject("b").field("d", "2").endObject();
954+
}
955+
b.endObject();
956+
}
957+
b.endObject();
958+
});
959+
assertThat(syntheticSource, equalTo("""
960+
{"field":{"a":{"b":{"c":"1"}},"b":{"b":{"d":"2"}}}}"""));
961+
}
962+
963+
public void testMultipleDotsInPath() throws IOException {
964+
DocumentMapper mapper = createSytheticSourceMapperService(
965+
mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
966+
).documentMapper();
967+
968+
var syntheticSource = syntheticSource(mapper, b -> {
969+
b.startObject("field");
970+
{
971+
b.startObject(".");
972+
{
973+
b.field(".", "bar");
974+
}
975+
b.endObject();
976+
}
977+
b.endObject();
978+
});
979+
// This behavior is weird to say the least. But this is the only reasonable way to interpret the meaning of the path `...`
980+
assertThat(syntheticSource, equalTo("""
981+
{"field":{"":{"":{"":{"":"bar"}}}}}"""));
982+
}
983+
934984
@Override
935985
protected boolean supportsCopyTo() {
936986
return false;

server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.nio.charset.StandardCharsets;
2222
import java.util.List;
2323
import java.util.stream.Collectors;
24+
import java.util.stream.Stream;
2425

2526
import static org.mockito.Mockito.mock;
2627
import static org.mockito.Mockito.when;
@@ -246,6 +247,54 @@ public void testScalarObjectMismatchInNestedObject() throws IOException {
246247
assertEquals("{\"a\":{\"b\":{\"c\":\"10\",\"c.d\":\"20\"}}}", baos.toString(StandardCharsets.UTF_8));
247248
}
248249

250+
public void testSingleDotPath() throws IOException {
251+
// GIVEN
252+
final SortedSetDocValues dv = mock(SortedSetDocValues.class);
253+
final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv));
254+
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
255+
final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos);
256+
final List<byte[]> bytes = Stream.of("." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList();
257+
when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size()));
258+
when(dv.docValueCount()).thenReturn(bytes.size());
259+
for (int i = 0; i < bytes.size(); i++) {
260+
when(dv.nextOrd()).thenReturn((long) i);
261+
when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length));
262+
}
263+
264+
// WHEN
265+
builder.startObject();
266+
writer.write(builder);
267+
builder.endObject();
268+
builder.flush();
269+
270+
// THEN
271+
assertEquals("{\"\":{\"\":\"10\"}}", baos.toString(StandardCharsets.UTF_8));
272+
}
273+
274+
public void testTrailingDotsPath() throws IOException {
275+
// GIVEN
276+
final SortedSetDocValues dv = mock(SortedSetDocValues.class);
277+
final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv));
278+
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
279+
final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos);
280+
final List<byte[]> bytes = Stream.of("cat.." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList();
281+
when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size()));
282+
when(dv.docValueCount()).thenReturn(bytes.size());
283+
for (int i = 0; i < bytes.size(); i++) {
284+
when(dv.nextOrd()).thenReturn((long) i);
285+
when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length));
286+
}
287+
288+
// WHEN
289+
builder.startObject();
290+
writer.write(builder);
291+
builder.endObject();
292+
builder.flush();
293+
294+
// THEN
295+
assertEquals("{\"cat\":{\"\":{\"\":\"10\"}}}", baos.toString(StandardCharsets.UTF_8));
296+
}
297+
249298
private class SortedSetSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues {
250299
private final SortedSetDocValues dv;
251300
private int seen = 0;

0 commit comments

Comments
 (0)