Skip to content

Commit 2c62300

Browse files
salvatorecampagnaromseygeek
authored andcommitted
Support all packed value lengths in NumericFieldStats.decodeLong (#15817)
The previous implementation only handled Integer.BYTES (4) and Long.BYTES (8), throwing IllegalArgumentException for other lengths. This broke fields using 2-byte point values such as HalfFloatPoint. Replaces the switch with a generic big-endian decoder that handles any length from 1 to 8 bytes. For point fields wider than Long.BYTES (e.g. InetAddressPoint), getStatsFromPoints returns null to fall through to the DocValuesSkipper path.
1 parent f921a4c commit 2c62300

File tree

4 files changed

+103
-29
lines changed

4 files changed

+103
-29
lines changed

lucene/CHANGES.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ API Changes
2020
New Features
2121
---------------------
2222

23-
* GITHUB#15740: Add NumericFieldStats utility for retrieving global numeric field statistics
24-
(min, max, doc count) from index metadata structures. Migrate SortedNumericDocValuesRangeQuery
25-
to use this API, fixing the rewrite optimization for fields with PointValues but no skip index.
23+
* GITHUB#15740, GITHUB#15817: Add NumericFieldStats utility for retrieving global numeric field
24+
statistics (min, max, doc count) from index metadata structures. Migrate
25+
SortedNumericDocValuesRangeQuery to use this API, fixing the rewrite optimization for fields
26+
with PointValues but no skip index. Support all packed value lengths from 1 to 8 bytes.
2627
(Salvatore Campagna)
2728

2829
* GITHUB#15722: Add interface to do prefetch on KnnVectorValues and an example implementation to use prefetch

lucene/core/src/java/org/apache/lucene/search/NumericFieldStats.java

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.lucene.index.LeafReader;
2323
import org.apache.lucene.index.LeafReaderContext;
2424
import org.apache.lucene.index.PointValues;
25-
import org.apache.lucene.util.NumericUtils;
2625

2726
/**
2827
* Utility class for retrieving global numeric field statistics from index metadata structures,
@@ -65,7 +64,10 @@ public static Stats getStats(IndexReader reader, String field) throws IOExceptio
6564
private static Stats getStatsFromPoints(IndexReader reader, String field) throws IOException {
6665
final byte[] minPacked = PointValues.getMinPackedValue(reader, field);
6766
final byte[] maxPacked = PointValues.getMaxPackedValue(reader, field);
68-
if (minPacked == null || maxPacked == null) {
67+
if (minPacked == null
68+
|| maxPacked == null
69+
|| minPacked.length > Long.BYTES
70+
|| maxPacked.length > Long.BYTES) {
6971
return null;
7072
}
7173
final int docCount = PointValues.getDocCount(reader, field);
@@ -101,31 +103,17 @@ private static Stats getStatsFromSkipper(IndexReader reader, String field) throw
101103
}
102104

103105
/**
104-
* Decodes a packed {@code byte[]} point value into a {@code long}. {@link PointValues} stores
105-
* numeric values as big-endian byte arrays with the sign bit flipped for sortable ordering.
106-
* {@code IntField} produces 4-byte arrays and {@code LongField} produces 8-byte arrays, so we
107-
* dispatch on length to call the appropriate {@link NumericUtils} decoder. The {@code int} case
108-
* widens to {@code long} via standard Java sign extension, which preserves the original value.
109-
*
110-
* <p>We return {@code long} unconditionally because the query layer already works with {@code
111-
* long} bounds (e.g. {@code SortedNumericDocValuesRangeQuery} stores its range as {@code long}
112-
* even for {@code IntField} queries). Callers that need the original {@code int} value can safely
113-
* narrow with {@code Math.toIntExact()}, which will never throw for values originating from an
114-
* {@code IntField}.
106+
* Decodes a packed {@code byte[]} point value into a {@code long}. Handles any packed value
107+
* length from 1 to 8 bytes, covering {@code HalfFloatPoint} (2 bytes), {@code IntField} (4
108+
* bytes), {@code LongField} (8 bytes), and any other width that uses the standard sortable
109+
* encoding (big-endian with sign bit flipped).
115110
*/
116111
private static long decodeLong(byte[] packed) {
117-
return switch (packed.length) {
118-
case Integer.BYTES -> NumericUtils.sortableBytesToInt(packed, 0);
119-
case Long.BYTES -> NumericUtils.sortableBytesToLong(packed, 0);
120-
default ->
121-
throw new IllegalArgumentException(
122-
"Unsupported packed value length: "
123-
+ packed.length
124-
+ " (expected "
125-
+ Long.BYTES
126-
+ " or "
127-
+ Integer.BYTES
128-
+ ")");
129-
};
112+
assert packed.length >= 1 && packed.length <= Long.BYTES;
113+
long result = (byte) (packed[0] ^ 0x80);
114+
for (int i = 1; i < packed.length; i++) {
115+
result = (result << 8) | (packed[i] & 0xFF);
116+
}
117+
return result;
130118
}
131119
}

lucene/core/src/test/org/apache/lucene/search/TestNumericFieldStats.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@
1717
package org.apache.lucene.search;
1818

1919
import java.io.IOException;
20+
import java.net.InetAddress;
2021
import org.apache.lucene.document.Document;
2122
import org.apache.lucene.document.Field;
23+
import org.apache.lucene.document.FieldType;
24+
import org.apache.lucene.document.InetAddressPoint;
2225
import org.apache.lucene.document.IntField;
2326
import org.apache.lucene.document.LongField;
2427
import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -30,9 +33,54 @@
3033
import org.apache.lucene.search.NumericFieldStats.Stats;
3134
import org.apache.lucene.store.Directory;
3235
import org.apache.lucene.tests.util.LuceneTestCase;
36+
import org.apache.lucene.util.BytesRef;
3337

3438
public class TestNumericFieldStats extends LuceneTestCase {
3539

40+
private static Field newCustomWidthPoint(String name, int numBytes, long value) {
41+
FieldType type = new FieldType();
42+
type.setDimensions(1, numBytes);
43+
type.freeze();
44+
byte[] packed = new byte[numBytes];
45+
for (int i = numBytes - 1; i >= 0; i--) {
46+
packed[i] = (byte) (value & 0xFF);
47+
value >>= 8;
48+
}
49+
packed[0] ^= (byte) 0x80;
50+
return new Field(name, new BytesRef(packed), type);
51+
}
52+
53+
private static long minValueForWidth(int numBytes) {
54+
return -(1L << (numBytes * 8 - 1));
55+
}
56+
57+
private static long maxValueForWidth(int numBytes) {
58+
return (1L << (numBytes * 8 - 1)) - 1;
59+
}
60+
61+
public void testGetStatsWithAllByteWidths() throws IOException {
62+
for (int numBytes = 1; numBytes <= Long.BYTES; numBytes++) {
63+
long min = minValueForWidth(numBytes);
64+
long max = maxValueForWidth(numBytes);
65+
try (Directory dir = newDirectory();
66+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
67+
for (long value : new long[] {min, 0, max}) {
68+
final Document doc = new Document();
69+
doc.add(newCustomWidthPoint("field", numBytes, value));
70+
w.addDocument(doc);
71+
}
72+
w.commit();
73+
try (IndexReader reader = DirectoryReader.open(w)) {
74+
final Stats stats = NumericFieldStats.getStats(reader, "field");
75+
assertNotNull("numBytes=" + numBytes, stats);
76+
assertEquals("numBytes=" + numBytes, min, stats.min());
77+
assertEquals("numBytes=" + numBytes, max, stats.max());
78+
assertEquals("numBytes=" + numBytes, 3, stats.docCount());
79+
}
80+
}
81+
}
82+
}
83+
3684
public void testGetStatsWithLongField() throws IOException {
3785
try (Directory dir = newDirectory();
3886
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
@@ -268,4 +316,17 @@ public void testGetStatsWithSegmentsWithAndWithoutField() throws IOException {
268316
}
269317
}
270318
}
319+
320+
public void testGetStatsReturnsNullForWidePointValues() throws Exception {
321+
try (Directory dir = newDirectory();
322+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
323+
final Document doc = new Document();
324+
doc.add(new InetAddressPoint("field", InetAddress.getByName("192.168.0.1")));
325+
w.addDocument(doc);
326+
w.commit();
327+
try (IndexReader reader = DirectoryReader.open(w)) {
328+
assertNull(NumericFieldStats.getStats(reader, "field"));
329+
}
330+
}
331+
}
271332
}

lucene/sandbox/src/test/org/apache/lucene/sandbox/document/TestHalfFloatPoint.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.document.Document;
2121
import org.apache.lucene.index.IndexReader;
2222
import org.apache.lucene.search.IndexSearcher;
23+
import org.apache.lucene.search.NumericFieldStats;
2324
import org.apache.lucene.store.Directory;
2425
import org.apache.lucene.tests.index.RandomIndexWriter;
2526
import org.apache.lucene.tests.util.LuceneTestCase;
@@ -245,6 +246,29 @@ public void testNextUp() {
245246
assertEquals(Float.floatToIntBits(0f), Float.floatToIntBits(HalfFloatPoint.nextUp(-0f)));
246247
}
247248

249+
public void testNumericFieldStats() throws Exception {
250+
Directory dir = newDirectory();
251+
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
252+
Document doc1 = new Document();
253+
doc1.add(new HalfFloatPoint("field", -2f));
254+
writer.addDocument(doc1);
255+
Document doc2 = new Document();
256+
doc2.add(new HalfFloatPoint("field", 1.25f));
257+
writer.addDocument(doc2);
258+
Document doc3 = new Document();
259+
doc3.add(new HalfFloatPoint("field", 100f));
260+
writer.addDocument(doc3);
261+
IndexReader reader = writer.getReader();
262+
NumericFieldStats.Stats stats = NumericFieldStats.getStats(reader, "field");
263+
assertNotNull(stats);
264+
assertTrue(stats.min() < 0);
265+
assertTrue(stats.max() > 0);
266+
assertEquals(3, stats.docCount());
267+
reader.close();
268+
writer.close();
269+
dir.close();
270+
}
271+
248272
public void testNextDown() {
249273
assertEquals(Float.NaN, HalfFloatPoint.nextDown(Float.NaN), 0f);
250274
assertEquals(Float.NEGATIVE_INFINITY, HalfFloatPoint.nextDown(Float.NEGATIVE_INFINITY), 0f);

0 commit comments

Comments
 (0)