Skip to content

Commit d9cfc19

Browse files
jordan-powersomricohenn
authored andcommitted
Fix offsets not recording duplicate values (elastic#125354)
Previously, when calculating the offsets, we just compared the values as-is without any loss of precision. However, when the values were saved into doc values and loaded in the doc values loader, they could have lost precision. This meant that values that were not duplicates when calculating the offsets could now be duplicates in the doc values loader. This interfered with the de-duplication logic, causing incorrect values to be returned. My solution is to apply the precision loss before calculating the offsets, so that both the offsets calculation and the SortedNumericDocValues de-duplication see the same values as duplicates.
1 parent 7352f54 commit d9cfc19

File tree

3 files changed

+102
-19
lines changed

3 files changed

+102
-19
lines changed

server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
438438
}
439439
}
440440

441+
@Override
442+
public long toSortableLong(Number value) {
443+
return HalfFloatPoint.halfFloatToSortableShort(value.floatValue());
444+
}
445+
441446
@Override
442447
public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
443448
return new SortedDoublesIndexFieldData.Builder(
@@ -622,6 +627,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
622627
}
623628
}
624629

630+
@Override
631+
public long toSortableLong(Number value) {
632+
return NumericUtils.floatToSortableInt(value.floatValue());
633+
}
634+
625635
@Override
626636
public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
627637
return new SortedDoublesIndexFieldData.Builder(
@@ -772,6 +782,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
772782
}
773783
}
774784

785+
@Override
786+
public long toSortableLong(Number value) {
787+
return NumericUtils.doubleToSortableLong(value.doubleValue());
788+
}
789+
775790
@Override
776791
public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
777792
return new SortedDoublesIndexFieldData.Builder(
@@ -891,6 +906,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
891906
INTEGER.addFields(document, name, value, indexed, docValued, stored);
892907
}
893908

909+
@Override
910+
public long toSortableLong(Number value) {
911+
return INTEGER.toSortableLong(value);
912+
}
913+
894914
@Override
895915
Number valueForSearch(Number value) {
896916
return value.byteValue();
@@ -1009,6 +1029,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
10091029
INTEGER.addFields(document, name, value, indexed, docValued, stored);
10101030
}
10111031

1032+
@Override
1033+
public long toSortableLong(Number value) {
1034+
return INTEGER.toSortableLong(value);
1035+
}
1036+
10121037
@Override
10131038
Number valueForSearch(Number value) {
10141039
return value.shortValue();
@@ -1206,6 +1231,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
12061231
}
12071232
}
12081233

1234+
@Override
1235+
public long toSortableLong(Number value) {
1236+
return value.intValue();
1237+
}
1238+
12091239
@Override
12101240
public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
12111241
return new SortedNumericIndexFieldData.Builder(
@@ -1358,6 +1388,11 @@ public void addFields(LuceneDocument document, String name, Number value, boolea
13581388
}
13591389
}
13601390

1391+
@Override
1392+
public long toSortableLong(Number value) {
1393+
return value.longValue();
1394+
}
1395+
13611396
@Override
13621397
public IndexFieldData.Builder getFieldDataBuilder(MappedFieldType ft, ValuesSourceType valuesSourceType) {
13631398
return new SortedNumericIndexFieldData.Builder(
@@ -1506,6 +1541,13 @@ public abstract void addFields(
15061541
boolean stored
15071542
);
15081543

1544+
/**
1545+
* For a given {@code Number}, returns the sortable long representation that will be stored in the doc values.
1546+
* @param value number to convert
1547+
* @return sortable long representation
1548+
*/
1549+
public abstract long toSortableLong(Number value);
1550+
15091551
public FieldValues<Number> compile(String fieldName, Script script, ScriptCompiler compiler) {
15101552
// only implemented for long and double fields
15111553
throw new IllegalArgumentException("Unknown parameter [script] for mapper [" + fieldName + "]");
@@ -2140,7 +2182,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
21402182
}
21412183
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.canAddIgnoredField()) {
21422184
if (value != null) {
2143-
context.getOffSetContext().recordOffset(offsetsFieldName, (Comparable<?>) value);
2185+
// We cannot simply cast value to Comparable<> because we need to also capture the potential loss of precision that occurs
2186+
// when the value is stored into the doc values.
2187+
long sortableLongValue = type.toSortableLong(value);
2188+
context.getOffSetContext().recordOffset(offsetsFieldName, sortableLongValue);
21442189
} else {
21452190
context.getOffSetContext().recordNull(offsetsFieldName);
21462191
}

server/src/test/java/org/elasticsearch/index/mapper/HalfFloatSyntheticSourceNativeArrayIntegrationTests.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,32 @@
1313

1414
import org.apache.lucene.sandbox.document.HalfFloatPoint;
1515

16+
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
17+
1618
public class HalfFloatSyntheticSourceNativeArrayIntegrationTests extends NativeArrayIntegrationTestCase {
1719

20+
public void testSynthesizeArray() throws Exception {
21+
var inputArrayValues = new Float[][] { new Float[] { 0.78151345F, 0.6886488F, 0.6882413F } };
22+
var expectedArrayValues = new Float[inputArrayValues.length][inputArrayValues[0].length];
23+
for (int i = 0; i < inputArrayValues.length; i++) {
24+
for (int j = 0; j < inputArrayValues[i].length; j++) {
25+
expectedArrayValues[i][j] = HalfFloatPoint.sortableShortToHalfFloat(
26+
HalfFloatPoint.halfFloatToSortableShort(inputArrayValues[i][j])
27+
);
28+
}
29+
}
30+
31+
var mapping = jsonBuilder().startObject()
32+
.startObject("properties")
33+
.startObject("field")
34+
.field("type", getFieldTypeName())
35+
.endObject()
36+
.endObject()
37+
.endObject();
38+
39+
verifySyntheticArray(inputArrayValues, expectedArrayValues, mapping, "_id");
40+
}
41+
1842
@Override
1943
protected String getFieldTypeName() {
2044
return "half_float";

server/src/test/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -259,47 +259,61 @@ protected void verifySyntheticArray(Object[][] arrays) throws IOException {
259259
}
260260

261261
protected void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, String... expectedStoredFields) throws IOException {
262+
verifySyntheticArray(arrays, arrays, mapping, expectedStoredFields);
263+
}
264+
265+
private XContentBuilder arrayToSource(Object[] array) throws IOException {
266+
var source = jsonBuilder().startObject();
267+
if (array != null) {
268+
source.startArray("field");
269+
for (Object arrayValue : array) {
270+
source.value(arrayValue);
271+
}
272+
source.endArray();
273+
} else {
274+
source.field("field").nullValue();
275+
}
276+
return source.endObject();
277+
}
278+
279+
protected void verifySyntheticArray(
280+
Object[][] inputArrays,
281+
Object[][] expectedArrays,
282+
XContentBuilder mapping,
283+
String... expectedStoredFields
284+
) throws IOException {
285+
assertThat(inputArrays.length, equalTo(expectedArrays.length));
286+
262287
var indexService = createIndex(
263288
"test-index",
264289
Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(),
265290
mapping
266291
);
267-
for (int i = 0; i < arrays.length; i++) {
268-
var array = arrays[i];
269-
292+
for (int i = 0; i < inputArrays.length; i++) {
270293
var indexRequest = new IndexRequest("test-index");
271294
indexRequest.id("my-id-" + i);
272-
var source = jsonBuilder().startObject();
273-
if (array != null) {
274-
source.startArray("field");
275-
for (Object arrayValue : array) {
276-
source.value(arrayValue);
277-
}
278-
source.endArray();
279-
} else {
280-
source.field("field").nullValue();
281-
}
282-
source.endObject();
283-
var expectedSource = Strings.toString(source);
284-
indexRequest.source(source);
295+
var inputSource = arrayToSource(inputArrays[i]);
296+
indexRequest.source(inputSource);
285297
indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
286298
client().index(indexRequest).actionGet();
287299

300+
var expectedSource = arrayToSource(expectedArrays[i]);
301+
288302
var searchRequest = new SearchRequest("test-index");
289303
searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i));
290304
var searchResponse = client().search(searchRequest).actionGet();
291305
try {
292306
var hit = searchResponse.getHits().getHits()[0];
293307
assertThat(hit.getId(), equalTo("my-id-" + i));
294-
assertThat(hit.getSourceAsString(), equalTo(expectedSource));
308+
assertThat(hit.getSourceAsString(), equalTo(Strings.toString(expectedSource)));
295309
} finally {
296310
searchResponse.decRef();
297311
}
298312
}
299313

300314
try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) {
301315
var reader = searcher.getDirectoryReader();
302-
for (int i = 0; i < arrays.length; i++) {
316+
for (int i = 0; i < expectedArrays.length; i++) {
303317
var document = reader.storedFields().document(i);
304318
// Verify that there is no ignored source:
305319
Set<String> storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList());

0 commit comments

Comments
 (0)