Skip to content

Commit f82977e

Browse files
authored
Merge pull request #7 from SentriusLLC/copilot/fix-6
Fix VectorIndexFooter build issues with defensive programming and validation
2 parents 9dcad5e + 6944b5a commit f82977e

File tree

5 files changed

+60
-12
lines changed

5 files changed

+60
-12
lines changed

core/src/main/java/org/apache/accumulo/core/file/rfile/VectorCompression.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ public static CompressedVector compress8Bit(float[] vector) {
6666
byte[] quantized = new byte[vector.length];
6767
float scale = 255.0f / range;
6868
for (int i = 0; i < vector.length; i++) {
69-
int quantizedValue = Math.round((vector[i] - min) * scale) - 128;
70-
quantized[i] = (byte) Math.max(-128, Math.min(127, quantizedValue));
69+
int quantizedValue = Math.round((vector[i] - min) * scale);
70+
quantized[i] = (byte) Math.max(0, Math.min(255, quantizedValue));
7171
}
7272

7373
return new CompressedVector(quantized, min, max, COMPRESSION_QUANTIZED_8BIT);
@@ -107,8 +107,8 @@ public static CompressedVector compress16Bit(float[] vector) {
107107
ByteBuffer buffer = ByteBuffer.allocate(vector.length * 2);
108108
float scale = 65535.0f / range;
109109
for (float v : vector) {
110-
int quantizedValue = Math.round((v - min) * scale) - 32768;
111-
short shortValue = (short) Math.max(-32768, Math.min(32767, quantizedValue));
110+
int quantizedValue = Math.round((v - min) * scale);
111+
short shortValue = (short) Math.max(0, Math.min(65535, quantizedValue));
112112
buffer.putShort(shortValue);
113113
}
114114

@@ -160,7 +160,7 @@ private static float[] decompress8Bit(CompressedVector compressed) {
160160

161161
float scale = range / 255.0f;
162162
for (int i = 0; i < data.length; i++) {
163-
int unsignedByte = (data[i] & 0xFF) + 128;
163+
int unsignedByte = data[i] & 0xFF;
164164
result[i] = min + (unsignedByte * scale);
165165
}
166166

@@ -184,7 +184,7 @@ private static float[] decompress16Bit(CompressedVector compressed) {
184184

185185
float scale = range / 65535.0f;
186186
for (int i = 0; i < result.length; i++) {
187-
int unsignedShort = (buffer.getShort() & 0xFFFF) + 32768;
187+
int unsignedShort = buffer.getShort() & 0xFFFF;
188188
result[i] = min + (unsignedShort * scale);
189189
}
190190

core/src/main/java/org/apache/accumulo/core/file/rfile/VectorIndexFooter.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,11 +221,22 @@ private float[][] performKMeansClustering(List<float[]> points, int k) {
221221
k = Math.min(k, points.size()); // Can't have more clusters than points
222222
int dimension = points.get(0).length;
223223

224+
// Validate that all points have the same dimension
225+
for (float[] point : points) {
226+
if (point.length != dimension) {
227+
throw new IllegalArgumentException("All points must have the same dimension: expected "
228+
+ dimension + ", got " + point.length);
229+
}
230+
}
231+
224232
// Initialize centroids randomly
225233
float[][] centroids = new float[k][dimension];
226234
for (int i = 0; i < k; i++) {
227235
// Use point i as initial centroid (simple initialization)
228-
System.arraycopy(points.get(i * points.size() / k), 0, centroids[i], 0, dimension);
236+
int pointIndex = (i * points.size()) / k;
237+
// Ensure we don't go out of bounds
238+
pointIndex = Math.min(pointIndex, points.size() - 1);
239+
System.arraycopy(points.get(pointIndex), 0, centroids[i], 0, dimension);
229240
}
230241

231242
// K-means iterations (simplified - normally would do multiple iterations)
@@ -302,6 +313,10 @@ private int[] findTopKNearestClusters(float[] point, float[][] centroids, int k)
302313
}
303314

304315
private float euclideanDistance(float[] a, float[] b) {
316+
if (a.length != b.length) {
317+
throw new IllegalArgumentException(
318+
"Vector dimensions must match: " + a.length + " != " + b.length);
319+
}
305320
float sum = 0.0f;
306321
for (int i = 0; i < a.length; i++) {
307322
float diff = a[i] - b[i];

core/src/main/java/org/apache/accumulo/core/file/rfile/VectorIterator.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,4 +504,13 @@ public void setVectorIndex(VectorIndex vectorIndex) {
504504
this.vectorIndex = vectorIndex;
505505
}
506506

507+
/**
508+
* Sets the vector index footer for this iterator, enabling advanced indexing capabilities.
509+
*
510+
* @param indexFooter the vector index footer containing hierarchical indexing structures
511+
*/
512+
public void setVectorIndexFooter(VectorIndexFooter indexFooter) {
513+
this.indexFooter = indexFooter;
514+
}
515+
507516
}

core/src/test/java/org/apache/accumulo/core/file/rfile/ProductionVectorStoreExampleTest.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,24 @@
2323
import java.util.HashMap;
2424
import java.util.List;
2525
import java.util.Map;
26+
import java.util.Random;
2627

2728
import org.apache.accumulo.core.data.Key;
2829
import org.apache.accumulo.core.data.Value;
2930

31+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
32+
3033
/**
3134
* Comprehensive example demonstrating production-ready vector store features including: - Metadata
3235
* integration for per-vector categories - Compression for storage efficiency - Batching/staging for
3336
* performance - Advanced indexing for scalability - Vector chunking for large embeddings
3437
*/
38+
@SuppressFBWarnings(value = "PREDICTABLE_RANDOM",
39+
justification = "This class is an example/demo, not security-sensitive production code.")
3540
public class ProductionVectorStoreExampleTest {
3641

42+
static Random rand = new Random(1234);
43+
3744
public static void main(String[] args) {
3845
System.out.println("=== Production Vector Store Capabilities ===\n");
3946

@@ -72,9 +79,9 @@ public static void demonstrateCategoryIntegration() {
7279
iteratorOptions.put(VectorIterator.TOP_K_OPTION, "5");
7380

7481
System.out.println("User with category filter = internal can access:");
75-
System.out.println(" Public vectors (always available)");
76-
System.out.println(" Internal vectors (category matches)");
77-
System.out.println(" Restricted vectors (not included in filter)");
82+
System.out.println(" + Public vectors (always available)");
83+
System.out.println(" + Internal vectors (category matches)");
84+
System.out.println(" - Restricted vectors (not included in filter)");
7885

7986
System.out.println();
8087
}
@@ -172,7 +179,7 @@ public static void demonstrateVectorChunking() {
172179

173180
float[] largeEmbedding = new float[1024];
174181
for (int i = 0; i < largeEmbedding.length; i++) {
175-
largeEmbedding[i] = (float) (Math.random() * 2.0 - 1.0);
182+
largeEmbedding[i] = (float) (rand.nextFloat() * 2.0 - 1.0);
176183
}
177184

178185
int chunkSize = 256;
@@ -191,7 +198,7 @@ private static List<VectorBuffer.VectorBlock.VectorEntry> createSampleVectorBloc
191198
List<VectorBuffer.VectorBlock.VectorEntry> entries = new ArrayList<>();
192199
for (int i = 0; i < count; i++) {
193200
Key key = new Key(prefix + "_" + i, "embedding", "vector", System.currentTimeMillis());
194-
float[] vector = {(float) Math.random(), (float) Math.random(), (float) Math.random()};
201+
float[] vector = {rand.nextFloat(), rand.nextFloat(), rand.nextFloat()};
195202
byte[] category = "public".getBytes();
196203
entries.add(new VectorBuffer.VectorBlock.VectorEntry(key, vector, category));
197204
}

core/src/test/java/org/apache/accumulo/core/file/rfile/VectorIndexFooterTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,4 +118,21 @@ public void testEmptyIndexBehavior() {
118118

119119
assertTrue(candidates.isEmpty());
120120
}
121+
122+
@Test
123+
public void testDimensionValidation() {
124+
VectorIndexFooter footer =
125+
new VectorIndexFooter(2, VectorIndexFooter.IndexingType.HIERARCHICAL);
126+
127+
// Create centroids with mismatched dimensions
128+
List<float[]> centroids = Arrays.asList(new float[] {1.0f, 0.0f}, // 2D
129+
new float[] {0.0f, 1.0f, 0.0f}); // 3D - this should cause an exception
130+
131+
try {
132+
footer.buildHierarchicalIndex(centroids, 2);
133+
assertTrue(false, "Expected IllegalArgumentException for mismatched dimensions");
134+
} catch (IllegalArgumentException e) {
135+
assertTrue(e.getMessage().contains("All points must have the same dimension"));
136+
}
137+
}
121138
}

0 commit comments

Comments
 (0)