Skip to content
Open
15 changes: 0 additions & 15 deletions bson/src/main/org/bson/BinaryVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
import org.bson.diagnostics.Logger;
import org.bson.diagnostics.Loggers;

import static org.bson.assertions.Assertions.isTrueArgument;
import static org.bson.assertions.Assertions.notNull;

/**
* Binary Vectors are densely packed arrays of numbers, all the same type, which are stored and retrieved efficiently using the BSON Binary
* Subtype 9 format. This class supports multiple vector {@link DataType}'s and provides static methods to create vectors.
Expand Down Expand Up @@ -67,16 +64,6 @@ public abstract class BinaryVector {
*/
@Beta(Reason.SERVER)
public static PackedBitBinaryVector packedBitVector(final byte[] data, final byte padding) {
notNull("data", data);
isTrueArgument("Padding must be between 0 and 7 bits. Provided padding: " + padding, padding >= 0 && padding <= 7);
isTrueArgument("Padding must be 0 if vector is empty. Provided padding: " + padding, padding == 0 || data.length > 0);
if (padding > 0) {
int mask = (1 << padding) - 1;
if ((data[data.length - 1] & mask) != 0) {
// JAVA-5848 in version 6.0.0 will convert this logging into an IllegalArgumentException
LOGGER.warn("The last " + padding + " padded bits should be zero in the final byte.");
}
}
return new PackedBitBinaryVector(data, padding);
}

Expand All @@ -93,7 +80,6 @@ public static PackedBitBinaryVector packedBitVector(final byte[] data, final byt
* @return A {@link Int8BinaryVector} instance with the {@link DataType#INT8} data type.
*/
public static Int8BinaryVector int8Vector(final byte[] data) {
notNull("data", data);
return new Int8BinaryVector(data);
}

Expand All @@ -109,7 +95,6 @@ public static Int8BinaryVector int8Vector(final byte[] data) {
* @return A {@link Float32BinaryVector} instance with the {@link DataType#FLOAT32} data type.
*/
public static Float32BinaryVector floatVector(final float[] data) {
notNull("data", data);
return new Float32BinaryVector(data);
}

Expand Down
8 changes: 4 additions & 4 deletions bson/src/main/org/bson/Float32BinaryVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import java.util.Arrays;

import static org.bson.assertions.Assertions.assertNotNull;
import static org.bson.assertions.Assertions.notNull;

/**
* Represents a vector of 32-bit floating-point numbers, where each element in the vector is a float.
Expand All @@ -35,9 +35,9 @@ public final class Float32BinaryVector extends BinaryVector {

private final float[] data;

Float32BinaryVector(final float[] vectorData) {
Float32BinaryVector(final float[] data) {
super(DataType.FLOAT32);
this.data = assertNotNull(vectorData);
this.data = notNull("data", data);
}

/**
Expand All @@ -49,7 +49,7 @@ public final class Float32BinaryVector extends BinaryVector {
* @return the underlying float array representing this {@link Float32BinaryVector} vector.
*/
public float[] getData() {
return assertNotNull(data);
return data;
}

@Override
Expand Down
6 changes: 3 additions & 3 deletions bson/src/main/org/bson/Int8BinaryVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import java.util.Arrays;
import java.util.Objects;

import static org.bson.assertions.Assertions.assertNotNull;
import static org.bson.assertions.Assertions.notNull;

/**
* Represents a vector of 8-bit signed integers, where each element in the vector is a byte.
Expand All @@ -38,7 +38,7 @@ public final class Int8BinaryVector extends BinaryVector {

Int8BinaryVector(final byte[] data) {
super(DataType.INT8);
this.data = assertNotNull(data);
this.data = notNull("data", data);
}

/**
Expand All @@ -50,7 +50,7 @@ public final class Int8BinaryVector extends BinaryVector {
* @return the underlying byte array representing this {@link Int8BinaryVector} vector.
*/
public byte[] getData() {
return assertNotNull(data);
return data;
}

@Override
Expand Down
13 changes: 12 additions & 1 deletion bson/src/main/org/bson/PackedBitBinaryVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.util.Objects;

import static org.bson.assertions.Assertions.assertNotNull;
import static org.bson.assertions.Assertions.isTrueArgument;
import static org.bson.assertions.Assertions.notNull;

/**
* Represents a packed bit vector, where each element of the vector is represented by a single bit (0 or 1).
Expand All @@ -43,8 +45,17 @@ public final class PackedBitBinaryVector extends BinaryVector {

PackedBitBinaryVector(final byte[] data, final byte padding) {
super(DataType.PACKED_BIT);
this.data = assertNotNull(data);
this.data = notNull("data", data);
this.padding = padding;
isTrueArgument("Padding must be between 0 and 7 bits. Provided padding: " + padding, padding >= 0 && padding <= 7);
isTrueArgument("Padding must be 0 if vector is empty. Provided padding: " + padding, padding == 0 || data.length > 0);
if (padding > 0) {
int mask = (1 << padding) - 1;
if ((data[data.length - 1] & mask) != 0) {
// JAVA-5848 in version 6.0.0 will convert this logging into an IllegalArgumentException
LOGGER.warn("The last " + padding + " padded bits should be zero in the final byte.");
}
}
Comment on lines +50 to +58
Copy link
Member

@vbabanin vbabanin Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the rationale for moving the validation into the constructor. The reason it wasn’t placed there originally is that we had two different validation paths:

  1. For user input, we throw an IllegalArgumentException (isTrueArgument(...)).
  2. For values coming from the server, an invalid state indicates unexpected and corrupted data, which is why those checks used assert instead (isTrue(...)). See: BinaryVectorHelper.java#L97-L98.

Putting the checks in the constructor means we now perform the same validation twice, which was something we tried to avoid for performance considerations initially, given that these checks could be executed frequently. We haven’t measured the exact impact, but it does introduce extra work.

As a side note:
We still haven’t fully aligned on validation boundaries, whether to validate strictly at API entry points (and accept duplication) or centralize deeper in the code. The driver currently uses both approaches in different areas. This might be worth discussing as a team at some point.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've removed from the checks from helper to just be in the constructor - as we have a public API BsonVector.packedBitVector that also requires the validation. This removes the double validation but does put the validation deeper in the code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up reverting that - as we return different errors depending on the scenarion - decoding data or constructing a new instance.

I think the cost will be minor of the duplicated check, however the public API does also require guarding for invalid data.

Its a slightly unusual scenario - we could add a flag to constructor to remove the extra checks but that would also be setting up a new convention.

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;

class BinaryVectorTest {
class BinaryVectorProseTest {

private ListAppender<ILoggingEvent> logWatcher;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,34 @@
import static org.bson.internal.vector.BinaryVectorHelper.determineVectorDType;
import static org.junit.Assert.assertThrows;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assumptions.assumeFalse;

/**
* See
* <a href="https://github.com/mongodb/specifications/tree/master/source/bson-binary-vector/tests">JSON-based tests that included in test resources</a>.
*/
class BinaryVectorGenericBsonTest {
class BinaryVectorTest {

private static final List<String> TEST_NAMES_TO_IGNORE = asList(
//It is impossible to provide float inputs for INT8.
// It is impossible to overflow byte with values higher than 127 in the API.
"Overflow Vector INT8",
// It is impossible to underflow byte with values lower than -128 in the API.
"Underflow Vector INT8",
// It is impossible to overflow byte with values higher than 127 in the API.
"Overflow Vector PACKED_BIT",
// It is impossible to underflow byte with values lower than -128 in the API.
"Underflow Vector PACKED_BIT",
//It is impossible to provide float inputs for PACKED_BIT in the API.
// It is impossible to provide float inputs for INT8 in the API.
"INT8 with float inputs",
// It is impossible to provide float inputs for PACKED_BIT in the API.
"Vector with float values PACKED_BIT",
//It is impossible to provide float inputs for INT8.
"Overflow Vector PACKED_BIT");
// It is impossible to provide float inputs with padding for FLOAT32 in the API.
"FLOAT32 with padding",
// It is impossible to provide padding for INT8 in the API.
"INT8 with padding",
// TODO JAVA-5848 in 6.0.0 "Padding specified with no vector data PACKED_BIT" will throw an error (currently logs a warning).
"Padding specified with no vector data PACKED_BIT"
);

@ParameterizedTest(name = "{0}")
@MethodSource("data")
Expand All @@ -72,75 +84,40 @@ void shouldPassAllOutcomes(@SuppressWarnings("unused") final String description,
if (isValidVector) {
runValidTestCase(testKey, testCase);
} else {
runInvalidTestCase(testDescription, testCase);
runInvalidTestCase(testCase);
}
}

private static void runInvalidTestCase(final String testDescription, final BsonDocument testCase) {
private static void runInvalidTestCase(final BsonDocument testCase) {
if (testCase.containsKey("vector")) {
assertValidationException(assertThrows(RuntimeException.class, () -> runInvalidTestCaseVector(testCase)));
}

// TODO JAVA-5848 in 6.0.0 "Padding specified with no vector data PACKED_BIT" will throw an error (currently logs a warning).
if (testCase.containsKey("canonical_bson") && !testDescription.equals("Padding specified with no vector data PACKED_BIT")) {
assertValidationException(assertThrows(RuntimeException.class, () -> runInvalidTestCaseCanonicalBson(testCase)));
runInvalidTestCaseVector(testCase);
}
}

private static void runInvalidTestCaseVector(final BsonDocument testCase) {
BsonArray arrayVector = testCase.getArray("vector");
byte dtypeByte = Byte.decode(testCase.getString("dtype_hex").getValue());
BinaryVector.DataType expectedDType = determineVectorDType(dtypeByte);
switch (expectedDType) {
case INT8:
if (testCase.containsKey("padding")) {
throw new IllegalArgumentException("Int8 is not supported with padding");
}
byte[] expectedVectorData = toByteArray(arrayVector);
BinaryVector.int8Vector(expectedVectorData);
break;
case PACKED_BIT:
byte expectedPadding = (byte) testCase.getInt32("padding", new BsonInt32(0)).getValue();
byte[] expectedVectorPackedBitData = toByteArray(arrayVector);
BinaryVector.packedBitVector(expectedVectorPackedBitData, expectedPadding);
break;
case FLOAT32:
if (testCase.containsKey("padding")) {
throw new IllegalArgumentException("Float32 is not supported with padding");
}
float[] expectedFloatVector = toFloatArray(arrayVector);
BinaryVector.floatVector(expectedFloatVector);
break;
default:
throw new AssertionFailedError("Unsupported vector data type: " + expectedDType);
}
}

private static void runInvalidTestCaseCanonicalBson(final BsonDocument testCase) {
String description = testCase.getString("description").getValue();
byte dtypeByte = Byte.decode(testCase.getString("dtype_hex").getValue());
String canonicalBsonHex = testCase.getString("canonical_bson").getValue().toUpperCase();
byte[] bytes = decodeToDocument(canonicalBsonHex, description).getBinary("vector").getData();

BinaryVector.DataType expectedDType = determineVectorDType(dtypeByte);

switch (expectedDType) {
case INT8:
if (testCase.containsKey("padding")) {
throw new IllegalArgumentException("Int8 is not supported with padding");
}
BinaryVector.int8Vector(bytes);
break;
case PACKED_BIT:
byte expectedPadding = (byte) testCase.getInt32("padding", new BsonInt32(0)).getValue();
BinaryVector.packedBitVector(bytes, expectedPadding);
break;
case FLOAT32:
throw new IllegalArgumentException("Float32 is not supported");
default:
throw new AssertionFailedError("Unsupported vector data type: " + expectedDType);
}

assertThrows(IllegalArgumentException.class, () -> {
switch (expectedDType) {
case INT8:
byte[] expectedVectorData = toByteArray(arrayVector);
BinaryVector.int8Vector(expectedVectorData);
break;
case PACKED_BIT:
byte expectedPadding = (byte) testCase.getInt32("padding", new BsonInt32(0)).getValue();
byte[] expectedVectorPackedBitData = toByteArray(arrayVector);
BinaryVector.packedBitVector(expectedVectorPackedBitData, expectedPadding);
break;
case FLOAT32:
float[] expectedFloatVector = toFloatArray(arrayVector);
BinaryVector.floatVector(expectedFloatVector);
break;
default:
throw new AssertionFailedError("Unsupported vector data type: " + expectedDType);
}
});
}

private static void runValidTestCase(final String testKey, final BsonDocument testCase) {
Expand Down Expand Up @@ -201,14 +178,10 @@ private static void runValidTestCase(final String testKey, final BsonDocument te
description);
break;
default:
throw new IllegalArgumentException("Unsupported vector data type: " + expectedDType);
throw new AssertionFailedError("Unsupported vector data type: " + expectedDType);
}
}

private static void assertValidationException(final RuntimeException runtimeException) {
assertTrue(runtimeException instanceof IllegalArgumentException || runtimeException instanceof IllegalStateException);
}

private static void assertThatVectorCreationResultsInCorrectBinary(final BinaryVector expectedVectorData,
final String testKey,
final BsonDocument actualDecodedDocument,
Expand Down