Skip to content

Commit bd34fae

Browse files
committed
Move float bulk-write into IndexWriter to enforce endianness
1 parent d3906f4 commit bd34fae

File tree

25 files changed

+114
-106
lines changed

25 files changed

+114
-106
lines changed

jvector-base/src/main/java/io/github/jbellis/jvector/disk/IndexWriter.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import java.io.Closeable;
2020
import java.io.DataOutput;
2121
import java.io.IOException;
22+
import java.nio.ByteBuffer;
23+
import java.nio.ByteOrder;
24+
import java.nio.FloatBuffer;
2225

2326
/**
2427
* Interface for writing index data.
@@ -30,4 +33,13 @@ public interface IndexWriter extends DataOutput, Closeable {
3033
* @throws IOException if an I/O error occurs
3134
*/
3235
long position() throws IOException;
36+
37+
default void writeFloats(float[] floats, int offset, int count) throws IOException {
38+
FloatBuffer fb = FloatBuffer.wrap(floats, offset, count);
39+
ByteBuffer bb = ByteBuffer.allocate(fb.capacity() * Float.BYTES);
40+
// DataOutput specifies BIG_ENDIAN for float
41+
bb.order(ByteOrder.BIG_ENDIAN).asFloatBuffer().put(fb);
42+
bb.rewind();
43+
write(bb.array());
44+
}
3345
}

jvector-base/src/main/java/io/github/jbellis/jvector/disk/SimpleMappedReader.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.io.IOException;
2222
import java.io.RandomAccessFile;
2323
import java.lang.reflect.Field;
24+
import java.nio.ByteOrder;
2425
import java.nio.MappedByteBuffer;
2526
import java.nio.channels.FileChannel;
2627
import java.nio.file.Path;
@@ -73,6 +74,7 @@ public Supplier(Path path) throws IOException {
7374
throw new RuntimeException("SimpleMappedReader doesn't support files above 2GB");
7475
}
7576
this.buffer = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, raf.length());
77+
this.buffer.order(ByteOrder.BIG_ENDIAN);
7678
this.buffer.load();
7779
}
7880
}

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/Feature.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19-
import java.io.DataOutput;
19+
import io.github.jbellis.jvector.disk.IndexWriter;
20+
2021
import java.io.IOException;
2122
import java.util.EnumMap;
2223
import java.util.function.IntFunction;
@@ -35,9 +36,9 @@ default boolean isFused() {
3536

3637
int featureSize();
3738

38-
void writeHeader(DataOutput out) throws IOException;
39+
void writeHeader(IndexWriter out) throws IOException;
3940

40-
default void writeInline(DataOutput out, State state) throws IOException {
41+
default void writeInline(IndexWriter out, State state) throws IOException {
4142
// default no-op
4243
}
4344

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedFeature.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19+
import io.github.jbellis.jvector.disk.IndexWriter;
1920
import io.github.jbellis.jvector.disk.RandomAccessReader;
2021
import io.github.jbellis.jvector.util.Accountable;
2122

22-
import java.io.DataOutput;
2323
import java.io.IOException;
2424

2525
/**
@@ -33,7 +33,7 @@ default boolean isFused() {
3333
return true;
3434
}
3535

36-
void writeSourceFeature(DataOutput out, State state) throws IOException;
36+
void writeSourceFeature(IndexWriter out, State state) throws IOException;
3737

3838
interface InlineSource extends Accountable {}
3939

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/FusedPQ.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19+
import io.github.jbellis.jvector.disk.IndexWriter;
1920
import io.github.jbellis.jvector.disk.RandomAccessReader;
2021
import io.github.jbellis.jvector.graph.ImmutableGraphIndex;
2122
import io.github.jbellis.jvector.graph.disk.CommonHeader;
@@ -31,7 +32,6 @@
3132
import io.github.jbellis.jvector.vector.types.VectorFloat;
3233
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
3334

34-
import java.io.DataOutput;
3535
import java.io.IOException;
3636
import java.io.UncheckedIOException;
3737
import java.util.function.IntFunction;
@@ -97,14 +97,14 @@ public ScoreFunction.ApproximateScoreFunction approximateScoreFunctionFor(Vector
9797
}
9898

9999
@Override
100-
public void writeHeader(DataOutput out) throws IOException {
100+
public void writeHeader(IndexWriter out) throws IOException {
101101
pq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
102102
}
103103

104104
// this is an awkward fit for the Feature.State design since we need to
105105
// generate the fused set based on the neighbors of the node, not just the node itself
106106
@Override
107-
public void writeInline(DataOutput out, Feature.State state_) throws IOException {
107+
public void writeInline(IndexWriter out, Feature.State state_) throws IOException {
108108
var state = (FusedPQ.State) state_;
109109

110110
var neighbors = state.view.getNeighborsIterator(0, state.nodeId);
@@ -138,7 +138,7 @@ public State(ImmutableGraphIndex.View view, IntFunction<ByteSequence<?>> compres
138138
}
139139

140140
@Override
141-
public void writeSourceFeature(DataOutput out, Feature.State state_) throws IOException {
141+
public void writeSourceFeature(IndexWriter out, Feature.State state_) throws IOException {
142142
var state = (FusedPQ.State) state_;
143143
var compressed = state.compressedVectorFunction.apply(state.nodeId);
144144
var temp = pqCodeScratch.get();

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/InlineVectors.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19+
import io.github.jbellis.jvector.disk.IndexWriter;
1920
import io.github.jbellis.jvector.disk.RandomAccessReader;
2021
import io.github.jbellis.jvector.graph.disk.CommonHeader;
2122
import io.github.jbellis.jvector.vector.VectorizationProvider;
2223
import io.github.jbellis.jvector.vector.types.VectorFloat;
2324
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
2425

25-
import java.io.DataOutput;
2626
import java.io.IOException;
2727

2828
/**
@@ -59,12 +59,12 @@ static InlineVectors load(CommonHeader header, RandomAccessReader reader) {
5959
}
6060

6161
@Override
62-
public void writeHeader(DataOutput out) {
62+
public void writeHeader(IndexWriter out) {
6363
// common header contains dimension, which is sufficient
6464
}
6565

6666
@Override
67-
public void writeInline(DataOutput out, Feature.State state) throws IOException {
67+
public void writeInline(IndexWriter out, Feature.State state) throws IOException {
6868
vectorTypeSupport.writeFloatVector(out, ((InlineVectors.State) state).vector);
6969
}
7070

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/NVQ.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19+
import io.github.jbellis.jvector.disk.IndexWriter;
1920
import io.github.jbellis.jvector.disk.RandomAccessReader;
2021
import io.github.jbellis.jvector.graph.disk.CommonHeader;
2122
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
@@ -26,7 +27,6 @@
2627
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
2728
import io.github.jbellis.jvector.vector.types.VectorFloat;
2829

29-
import java.io.DataOutput;
3030
import java.io.IOException;
3131
import java.io.UncheckedIOException;
3232

@@ -70,12 +70,12 @@ static NVQ load(CommonHeader header, RandomAccessReader reader) {
7070
}
7171

7272
@Override
73-
public void writeHeader(DataOutput out) throws IOException {
73+
public void writeHeader(IndexWriter out) throws IOException {
7474
nvq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
7575
}
7676

7777
@Override
78-
public void writeInline(DataOutput out, Feature.State state_) throws IOException {
78+
public void writeInline(IndexWriter out, Feature.State state_) throws IOException {
7979
var state = (NVQ.State) state_;
8080
state.vector.write(out);
8181
}

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/SeparatedFeature.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19-
import java.io.DataOutput;
19+
import io.github.jbellis.jvector.disk.IndexWriter;
20+
2021
import java.io.IOException;
2122

2223
public interface SeparatedFeature extends Feature {
2324
void setOffset(long offset);
2425
long getOffset();
2526

26-
void writeSeparately(DataOutput out, State state) throws IOException;
27+
void writeSeparately(IndexWriter out, State state) throws IOException;
2728
}

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/SeparatedNVQ.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19+
import io.github.jbellis.jvector.disk.IndexWriter;
1920
import io.github.jbellis.jvector.disk.RandomAccessReader;
2021
import io.github.jbellis.jvector.graph.disk.CommonHeader;
2122
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
@@ -25,7 +26,6 @@
2526
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
2627
import io.github.jbellis.jvector.vector.types.VectorFloat;
2728

28-
import java.io.DataOutput;
2929
import java.io.IOException;
3030
import java.io.UncheckedIOException;
3131

@@ -68,13 +68,13 @@ public int featureSize() {
6868
}
6969

7070
@Override
71-
public void writeHeader(DataOutput out) throws IOException {
71+
public void writeHeader(IndexWriter out) throws IOException {
7272
nvq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
7373
out.writeLong(offset);
7474
}
7575

7676
@Override
77-
public void writeSeparately(DataOutput out, State state_) throws IOException {
77+
public void writeSeparately(IndexWriter out, State state_) throws IOException {
7878
var state = (NVQ.State) state_;
7979
if (state.vector != null) {
8080
state.vector.write(out);

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/feature/SeparatedVectors.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616

1717
package io.github.jbellis.jvector.graph.disk.feature;
1818

19+
import io.github.jbellis.jvector.disk.IndexWriter;
1920
import io.github.jbellis.jvector.disk.RandomAccessReader;
2021
import io.github.jbellis.jvector.graph.disk.CommonHeader;
2122
import io.github.jbellis.jvector.vector.VectorizationProvider;
2223
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
2324

24-
import java.io.DataOutput;
2525
import java.io.IOException;
2626
import java.io.UncheckedIOException;
2727

@@ -61,12 +61,12 @@ public int featureSize() {
6161
}
6262

6363
@Override
64-
public void writeHeader(DataOutput out) throws IOException {
64+
public void writeHeader(IndexWriter out) throws IOException {
6565
out.writeLong(offset);
6666
}
6767

6868
@Override
69-
public void writeSeparately(DataOutput out, State state_) throws IOException {
69+
public void writeSeparately(IndexWriter out, State state_) throws IOException {
7070
var state = (InlineVectors.State) state_;
7171
if (state.vector != null) {
7272
vectorTypeSupport.writeFloatVector(out, state.vector);

0 commit comments

Comments
 (0)