Skip to content

Commit 8ff8b82

Browse files
committed
Refactor main loop, improve comments, test, docs
1 parent 408393c commit 8ff8b82

File tree

3 files changed

+53
-25
lines changed

3 files changed

+53
-25
lines changed

docs/reference/elasticsearch/mapping-reference/flattened.md

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ Will become (note the nested objects instead of the "flattened" array):
365365
}
366366
```
367367

368-
Flattened fields allow for a duplicate key to contain both an object and a scalar value.
368+
Flattened fields allow for a key to contain both an object and a scalar value.
369369
For example, consider the following flattened field `flattened`:
370370

371371
```console-result
@@ -384,9 +384,8 @@ For example, consider the following flattened field `flattened`:
384384
Because `"foo.bar": "10"` is implicitly equivalent to `"foo": { "bar": "10" }`,
385385
`"bar"` has both a scalar value `"10"`, and an object value of `{ "baz": "20" }`.
386386

387-
With synthetic source, objects with such duplicate fields will appear
388-
differently in `_source`. For example, if the above field has synthetic
389-
source enabled, the value of `_source` would be:
387+
With synthetic source, to produce a valid JSON output, objects with such fields will appear differently in `_source`.
388+
For example, if the field is defined in an index configured with synthetic source, the value of `_source` would be:
390389

391390
```console-result
392391
{
@@ -398,8 +397,3 @@ source enabled, the value of `_source` would be:
398397
}
399398
}
400399
```
401-
402-
This is because, when constructing the source, the key `"foo.bar"` is eagerly expanding into the key `"foo"` with an object value.
403-
Then `"bar"` is added to the object with the scalar value of `"10"`. Then, because another
404-
`"bar"` cannot be added with an object value, `"bar"` and `"baz"` are collapsed
405-
in the flat key `"bar.baz"` with a scalar value of `"20"`

server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ private KeyValue(final String[] key, final String value) {
125125
this(value, new Prefix(key), key[key.length - 1]);
126126
}
127127

128+
private static KeyValue fromBytesRef(final BytesRef keyValue) {
129+
return keyValue == null ? EMPTY : new KeyValue(keyValue);
130+
}
131+
128132
public String leaf() {
129133
return this.leaf;
130134
}
@@ -185,30 +189,36 @@ public void write(final XContentBuilder b) throws IOException {
185189
KeyValue next;
186190

187191
while (curr.equals(KeyValue.EMPTY) == false) {
192+
next = KeyValue.fromBytesRef(sortedKeyedValues.next());
188193
values.add(curr.value());
189-
BytesRef nextValue = sortedKeyedValues.next();
190-
next = nextValue == null ? KeyValue.EMPTY : new KeyValue(nextValue);
191194

195+
// Gather all values with the same path into a list so they can be written to a field together.
196+
while (curr.pathEquals(next)) {
197+
curr = next;
198+
next = KeyValue.fromBytesRef(sortedKeyedValues.next());
199+
values.add(curr.value());
200+
}
201+
202+
// startPrefix is the suffix of the path that is within the currently open object, not including the leaf.
203+
// For example, if the path is foo.bar.baz.qux, and openObjects is [foo], then startPrefix is bar.baz, and leaf is qux.
192204
var startPrefix = curr.prefix.diff(openObjects);
193205
if (startPrefix.parts.isEmpty() == false && startPrefix.parts.getFirst().equals(lastScalarSingleLeaf)) {
194-
// In the currently open object, a previous key with a scalar value is a prefix of the current path. Instead of traversing
195-
// the path and building nested objects, we concatenate the path into a single key and add it as a field. For example:
206+
// We have encountered a key with an object value, which already has a scalar value. Instead of creating an object for the
207+
// key, we concatenate the key and all child keys going down to the current leaf into a single field name. For example:
196208
// Assume the current object contains "foo": 10 and "foo.bar": 20. Since key-value pairs are sorted, "foo" is processed
197209
// first. When writing the field "foo", `lastScalarSingleLeaf` is set to "foo" because it has a scalar value. Next, when
198210
// processing "foo.bar", we check if `lastScalarSingleLeaf` ("foo") is a prefix of "foo.bar". Since it is, this indicates a
199-
// conflict: a scalar value and an object share the same key ("foo"). To disambiguate, we create a flat key "foo.bar"
200-
// with the value 20 in the current object, rather than creating a nested object as usual.
201-
if (curr.pathEquals(next) == false) {
202-
String combinedPath = concatPath(startPrefix, curr.leaf());
203-
writeField(b, values, combinedPath);
204-
}
211+
// conflict: a scalar value and an object share the same key ("foo"). To disambiguate, we create a concatenated key
212+
// "foo.bar" with the value 20 in the current object, rather than creating a nested object as usual.
213+
writeField(b, values, concatPath(startPrefix, curr.leaf()));
205214
} else {
206-
// Traverse down into path, writing object keys to output, and adding to the openObject context.
215+
// Since there is not an existing key in the object that is a prefix of the path, we can traverse down into the path
216+
// and open objects. After opening all objects in the path, write out the field with only the current leaf as the key.
217+
// Finally, save the current leaf in `lastScalarSingleLeaf`, in case there is a future object within the recently opened
218+
// object which has the same key as the current leaf.
207219
startObject(b, startPrefix.parts, openObjects.parts);
208-
if (curr.pathEquals(next) == false) {
209-
lastScalarSingleLeaf = curr.leaf();
210-
writeField(b, values, curr.leaf());
211-
}
220+
writeField(b, values, curr.leaf());
221+
lastScalarSingleLeaf = curr.leaf();
212222
}
213223

214224
int numObjectsToEnd = Prefix.numObjectsToEnd(openObjects.parts, next.prefix.parts);

server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,13 +880,37 @@ public void testSyntheticSourceWithScalarObjectMismatch() throws IOException {
880880
b.startObject("key1");
881881
{
882882
b.startObject("key2").field("key3", "bar").endObject();
883+
b.field("key4", "baz");
883884
}
884885
b.endObject();
886+
b.field("key5", "qux");
885887
}
886888
b.endObject();
887889
});
888890
assertThat(syntheticSource, equalTo("""
889-
{"field":{"key1":{"key2":"foo","key2.key3":"bar"}}}"""));
891+
{"field":{"key1":{"key2":"foo","key2.key3":"bar","key4":"baz"},"key5":"qux"}}"""));
892+
}
893+
894+
public void testSyntheticSourceWithScalarObjectMismatchArray() throws IOException {
895+
DocumentMapper mapper = createSytheticSourceMapperService(
896+
mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
897+
).documentMapper();
898+
899+
var syntheticSource = syntheticSource(mapper, b -> {
900+
b.startObject("field");
901+
{
902+
b.array("key1.key2", "qux", "foo");
903+
b.startObject("key1");
904+
{
905+
b.field("key2.key3", "baz");
906+
b.field("key2", "bar");
907+
}
908+
b.endObject();
909+
}
910+
b.endObject();
911+
});
912+
assertThat(syntheticSource, equalTo("""
913+
{"field":{"key1":{"key2":["bar","foo","qux"],"key2.key3":"baz"}}}"""));
890914
}
891915

892916
public void testSyntheticSourceWithEmptyObject() throws IOException {

0 commit comments

Comments
 (0)