Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@
org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat,
org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat,
org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat,
org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat;

provides org.apache.lucene.codecs.Codec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
Expand Down Expand Up @@ -250,7 +249,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex(FieldInfo fieldI
final IndexInput finalVectorDataInput = vectorDataInput;
final RandomVectorScorerSupplier randomVectorScorerSupplier = vectorsScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
new OffHeapBFloat16VectorValues.DenseOffHeapVectorValues(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing Import Statement

Usage of OffHeapBFloat16VectorValues class without visible import statement. Runtime ClassNotFoundException will occur if import missing causing vector indexing failures. Service degradation results from failed vector operations.

import org.elasticsearch.index.codec.vectors.es93.OffHeapBFloat16VectorValues;
Commitable Suggestion
Suggested change
new OffHeapBFloat16VectorValues.DenseOffHeapVectorValues(
import org.elasticsearch.index.codec.vectors.es93.OffHeapBFloat16VectorValues;
Standards
  • ISO-IEC-25010-Reliability-Fault-Tolerance
  • ISO-IEC-25010-Functional-Correctness-Appropriateness

fieldInfo.getVectorDimension(),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The class OffHeapBFloat16VectorValues is used here, suggesting that the file OffHeapFloatVectorValues is no longer needed and can be removed. Please confirm if this is the case and remove the import if it's no longer used.

docsWithField.cardinality(),
finalVectorDataInput,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors.es93;

import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat;

import java.io.IOException;
import java.util.concurrent.ExecutorService;

public class ES93HnswVectorsFormat extends AbstractHnswVectorsFormat {

static final String NAME = "ES93HnswVectorsFormat";

private final FlatVectorsFormat flatVectorsFormat;

public ES93HnswVectorsFormat() {
super(NAME);
flatVectorsFormat = new ES93GenericFlatVectorsFormat();
}

public ES93HnswVectorsFormat(int maxConn, int beamWidth, boolean bfloat16, boolean useDirectIO) {
super(NAME, maxConn, beamWidth);
flatVectorsFormat = new ES93GenericFlatVectorsFormat(bfloat16, useDirectIO);
}

public ES93HnswVectorsFormat(
int maxConn,
int beamWidth,
boolean bfloat16,
boolean useDirectIO,
int numMergeWorkers,
ExecutorService mergeExec
) {
super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec);
flatVectorsFormat = new ES93GenericFlatVectorsFormat(bfloat16, useDirectIO);
}
Comment on lines +40 to +50
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executor Validation Gap

Constructor accepts ExecutorService parameter without null validation before passing to superclass. NullPointerException during vector merging operations if null executor provided. Vector indexing failures result from unhandled null executor state.

Standards
  • ISO-IEC-25010-Reliability-Fault-Tolerance
  • DbC-Preconditions


@Override
protected FlatVectorsFormat flatVectorsFormat() {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider renaming this method to getFlatVectorsFormat to follow Java naming conventions for getter methods. This improves readability and consistency.

Suggested change
protected FlatVectorsFormat flatVectorsFormat() {
protected FlatVectorsFormat getFlatVectorsFormat() {
return flatVectorsFormat;
}

return flatVectorsFormat;
}

@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), numMergeWorkers, mergeExec);
}

@Override
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsForma
org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat
org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat
org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat
org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Verify that the fully-qualified name org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat exactly matches the implementation class (package + class name), that the class is declared public and exposes the expected SPI-compatible constructor/signature, and confirm the class is included in the built JAR so ServiceLoader can load it at runtime. [possible issue]

org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors.es93;

import org.apache.lucene.index.VectorEncoding;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.hamcrest.Matchers.closeTo;

public class ES93HnswBFloat16VectorsFormatTests extends ES93HnswVectorsFormatTests {

@Override
protected boolean useBFloat16() {
return true;
}

@Override
protected VectorEncoding randomVectorEncoding() {
return VectorEncoding.FLOAT32;
}

@Override
public void testEmptyByteVectorData() throws Exception {
// no bytes
}

@Override
public void testMergingWithDifferentByteKnnFields() throws Exception {
// no bytes
}

@Override
public void testByteVectorScorerIteration() throws Exception {
// no bytes
}

@Override
public void testSortedIndexBytes() throws Exception {
// no bytes
}

@Override
public void testMismatchedFields() throws Exception {
// no bytes
}

@Override
public void testRandomBytes() throws Exception {
// no bytes
}

@Override
public void testRandom() throws Exception {
AssertionError err = expectThrows(AssertionError.class, super::testRandom);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The test testRandom is expected to throw an AssertionError. It would be helpful to add a comment explaining why this is expected and what kind of assertions are being made within the assertFloatsWithinBounds method. This will improve the readability and maintainability of the test.

assertFloatsWithinBounds(err);
}

@Override
public void testRandomWithUpdatesAndGraph() throws Exception {
AssertionError err = expectThrows(AssertionError.class, super::testRandomWithUpdatesAndGraph);
assertFloatsWithinBounds(err);
}

@Override
public void testSparseVectors() throws Exception {
AssertionError err = expectThrows(AssertionError.class, super::testSparseVectors);
assertFloatsWithinBounds(err);
}

@Override
public void testVectorValuesReportCorrectDocs() throws Exception {
AssertionError err = expectThrows(AssertionError.class, super::testVectorValuesReportCorrectDocs);
assertFloatsWithinBounds(err);
}

private static final Pattern FLOAT_ASSERTION_FAILURE = Pattern.compile(".*expected:<([0-9.-]+)> but was:<([0-9.-]+)>");

private static void assertFloatsWithinBounds(AssertionError error) {
Matcher m = FLOAT_ASSERTION_FAILURE.matcher(error.getMessage());
if (m.matches() == false) {
throw error; // nothing to do with us, just rethrow
}

// numbers just need to be in the same vicinity
double expected = Double.parseDouble(m.group(1));
double actual = Double.parseDouble(m.group(2));
double allowedError = expected * 0.01; // within 1%
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Use a relative tolerance based on the absolute magnitude and a small minimum epsilon so comparisons don't fail when the expected value is zero or very small. [possible bug]

Suggested change
double allowedError = expected * 0.01; // within 1%
// use absolute magnitude for relative tolerance and enforce a small minimum epsilon to handle expected == 0
double allowedError = Math.max(Math.abs(expected) * 0.01, 1e-6); // within 1% or at least 1e-6
Why Change? ⭐

This change is syntactically valid Java and uses only java.lang.Math methods, which are always available. It preserves the intended relative-tolerance behavior for non-zero expected values (allowedError = 1% of |expected|) while avoiding an overly strict allowedError of 0 when expected == 0 by enforcing a small epsilon (1e-6). The assertion call and surrounding parsing remain unchanged. There are no new imports or external dependencies, and the new computation does not introduce new exceptions for normal numeric inputs, so this is safe to apply and thus verified.

assertThat(error.getMessage(), actual, closeTo(expected, allowedError));
Comment on lines +87 to +97

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The allowed error is calculated as 1% of the expected value. While this might be sufficient, consider making this configurable or using a more robust method to determine the allowed error, especially if the expected values can be very small. A relative error might not be appropriate for values close to zero.

}
Comment on lines +85 to +98
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Fix tolerance calc and regex in BF16 float assertion helper

  • allowedError must be non-negative; zero expected needs absolute fallback.
  • Regex should handle scientific notation.

Apply:

-    private static final Pattern FLOAT_ASSERTION_FAILURE = Pattern.compile(".*expected:<([0-9.-]+)> but was:<([0-9.-]+)>");
+    private static final Pattern FLOAT_ASSERTION_FAILURE = Pattern.compile(
+        ".*expected:<([-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)> but was:<([-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)>.*"
+    );
@@
-        double allowedError = expected * 0.01;  // within 1%
-        assertThat(error.getMessage(), actual, closeTo(expected, allowedError));
+        double allowedError = Math.max(1e-6, Math.abs(expected) * 0.01);  // within 1% or small abs floor
+        assertThat(error.getMessage(), actual, closeTo(expected, allowedError));
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
private static final Pattern FLOAT_ASSERTION_FAILURE = Pattern.compile(".*expected:<([0-9.-]+)> but was:<([0-9.-]+)>");
private static void assertFloatsWithinBounds(AssertionError error) {
Matcher m = FLOAT_ASSERTION_FAILURE.matcher(error.getMessage());
if (m.matches() == false) {
throw error; // nothing to do with us, just rethrow
}
// numbers just need to be in the same vicinity
double expected = Double.parseDouble(m.group(1));
double actual = Double.parseDouble(m.group(2));
double allowedError = expected * 0.01; // within 1%
assertThat(error.getMessage(), actual, closeTo(expected, allowedError));
}
private static final Pattern FLOAT_ASSERTION_FAILURE = Pattern.compile(
".*expected:<([-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)> but was:<([-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)>.*"
);
private static void assertFloatsWithinBounds(AssertionError error) {
Matcher m = FLOAT_ASSERTION_FAILURE.matcher(error.getMessage());
if (m.matches() == false) {
throw error; // nothing to do with us, just rethrow
}
// numbers just need to be in the same vicinity
double expected = Double.parseDouble(m.group(1));
double actual = Double.parseDouble(m.group(2));
double allowedError = Math.max(1e-6, Math.abs(expected) * 0.01); // within 1% or small abs floor
assertThat(error.getMessage(), actual, closeTo(expected, allowedError));
}
🤖 Prompt for AI Agents
In
server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswBFloat16VectorsFormatTests.java
around lines 85-98, the float-assertion helper uses a regex that doesn't accept
scientific notation and computes allowedError directly from expected which can
be negative or zero; update the regex to accept decimal and scientific notation
for both groups (e.g. allow digits, optional sign, decimal point and optional
exponent with [eE][+-]?\\d+), and compute allowedError as a non-negative value
using Math.abs(expected) * 0.01 with a small absolute fallback when expected is
zero (e.g. Math.max(Math.abs(expected) * 0.01, 1e-6)); keep parsing with
Double.parseDouble and assert with closeTo using the corrected allowedError.

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.vectors.es93;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.SameThreadExecutorService;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.index.codec.vectors.BFloat16;

import java.io.IOException;
import java.util.Locale;

import static java.lang.String.format;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.oneOf;

public class ES93HnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase {

static {
LogConfigurator.loadLog4jPlugins();
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
}

private KnnVectorsFormat format;

protected boolean useBFloat16() {
return false;
}

@Override
public void setUp() throws Exception {
format = new ES93HnswVectorsFormat(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, useBFloat16(), random().nextBoolean());
super.setUp();
}

@Override
protected Codec getCodec() {
return TestUtil.alwaysKnnVectorsFormat(format);
}

public void testToString() {
FilterCodec customCodec = new FilterCodec("foo", Codec.getDefault()) {
@Override
public KnnVectorsFormat knnVectorsFormat() {
return new ES93HnswVectorsFormat(10, 20, false, false);
}
};
String expectedPattern = "ES93HnswVectorsFormat(name=ES93HnswVectorsFormat, maxConn=10, beamWidth=20,"
+ " flatVectorFormat=ES93GenericFlatVectorsFormat(name=ES93GenericFlatVectorsFormat,"
+ " format=Lucene99FlatVectorsFormat(name=Lucene99FlatVectorsFormat, flatVectorScorer=%s())))";
var defaultScorer = format(Locale.ROOT, expectedPattern, "DefaultFlatVectorScorer");
var memSegScorer = format(Locale.ROOT, expectedPattern, "Lucene99MemorySegmentFlatVectorsScorer");
assertThat(customCodec.knnVectorsFormat().toString(), is(oneOf(defaultScorer, memSegScorer)));
}

public void testLimits() {
expectThrows(IllegalArgumentException.class, () -> new ES93HnswVectorsFormat(-1, 20, false, false));
expectThrows(IllegalArgumentException.class, () -> new ES93HnswVectorsFormat(0, 20, false, false));
expectThrows(IllegalArgumentException.class, () -> new ES93HnswVectorsFormat(20, 0, false, false));
expectThrows(IllegalArgumentException.class, () -> new ES93HnswVectorsFormat(20, -1, false, false));
expectThrows(IllegalArgumentException.class, () -> new ES93HnswVectorsFormat(512 + 1, 20, false, false));
expectThrows(IllegalArgumentException.class, () -> new ES93HnswVectorsFormat(20, 3201, false, false));
expectThrows(
IllegalArgumentException.class,
() -> new ES93HnswVectorsFormat(20, 100, false, false, 1, new SameThreadExecutorService())
);
}

public void testSimpleOffHeapSize() throws IOException {
float[] vector = randomVector(random().nextInt(12, 500));
try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT));
w.addDocument(doc);
w.commit();
try (IndexReader reader = DirectoryReader.open(w)) {
LeafReader r = getOnlyLeafReader(reader);
if (r instanceof CodecReader codecReader) {
KnnVectorsReader knnVectorsReader = codecReader.getVectorReader();
if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) {
knnVectorsReader = fieldsReader.getFieldReader("f");
}
var fieldInfo = r.getFieldInfos().fieldInfo("f");
var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo);
int bytes = useBFloat16() ? BFloat16.BYTES : Float.BYTES;
assertEquals(vector.length * bytes, (long) offHeap.get("vec"));
assertEquals(1L, (long) offHeap.get("vex"));
assertEquals(2, offHeap.size());
}
}
}
}
}