diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java new file mode 100644 index 0000000000000..babfcfcc84745 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.benchmark.vector; + +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +// first iteration is complete garbage, so make sure we really warmup +@Warmup(iterations = 4, time = 1) +// real iterations. not useful to spend tons of time here, better to fork more +@Measurement(iterations = 5, time = 1) +// engage some noise reduction +@Fork(value = 1) +public class PackAsBinaryBenchmark { + + static { + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Param({ "384", "782", "1024" }) + int dims; + + int length; + + int numVectors = 1000; + + int[][] qVectors; + byte[] packed; + + @Setup + public void setup() throws IOException { + Random random = new Random(123); + + this.length = BQVectorUtils.discretize(dims, 64) / 8; + this.packed = new byte[length]; + + qVectors = new int[numVectors][dims]; + for (int[] qVector : qVectors) { + for (int i = 0; i < dims; i++) { + qVector[i] = random.nextInt(2); + } + } + } + + @Benchmark + public void packAsBinary(Blackhole bh) { + for (int i = 0; i < numVectors; i++) { + BQVectorUtils.packAsBinary(qVectors[i], packed); + bh.consume(packed); + } + } + + @Benchmark + public void packAsBinaryLegacy(Blackhole bh) { + for (int i = 0; i < numVectors; i++) { + BQVectorUtils.packAsBinaryLegacy(qVectors[i], packed); + bh.consume(packed); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java index 75d0d2aa93a4d..f2ef2b05541f8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java @@ -40,7 +40,7 @@ public static boolean isUnitVector(float[] v) { return Math.abs(l1norm - 1.0d) <= EPSILON; } - public static void packAsBinary(int[] vector, byte[] packed) { + public static void packAsBinaryLegacy(int[] vector, byte[] packed) { for (int i = 0; i < vector.length;) { byte result = 0; for (int j = 7; j >= 0 && i < vector.length; j--) { @@ -54,6 +54,34 @@ public static void packAsBinary(int[] vector, byte[] packed) { } } + public static void packAsBinary(int[] vector, byte[] packed) { + int limit = vector.length - 7; + int i = 0; + int index = 0; + for (; i < limit; i += 8, index++) { + assert vector[i] == 0 || vector[i] == 1; + assert vector[i + 1] == 0 || vector[i + 1] == 1; + assert vector[i + 2] == 0 || vector[i + 2] == 1; + assert vector[i + 3] == 0 || vector[i + 3] == 1; + assert vector[i + 4] == 0 || vector[i + 4] == 1; + assert vector[i + 5] == 0 || vector[i + 5] == 1; + assert vector[i + 6] == 0 || vector[i + 6] == 1; + assert vector[i + 7] == 0 || vector[i + 7] == 1; + int result = vector[i] << 7 | (vector[i + 1] << 6) | (vector[i + 2] << 5) | (vector[i + 3] << 4) | (vector[i + 4] << 3) + | (vector[i + 5] << 2) | (vector[i + 6] << 1) | (vector[i + 7]); + packed[index] = (byte) result; + } + if (i == vector.length) { + return; + } + byte result = 0; + for (int j = 7; j >= 0 && i < vector.length; i++, j--) { + assert vector[i] == 0 || vector[i] == 1; + result |= (byte) ((vector[i] & 1) << j); + } + packed[index] = result; + } + public static int discretize(int value, int bucket) { return ((value + (bucket - 1)) / bucket) * bucket; } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/BQVectorUtilsTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/BQVectorUtilsTests.java index 35349cd054dfd..b2e3ac85609b8 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/BQVectorUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/BQVectorUtilsTests.java @@ -64,6 +64,20 @@ public void testPackAsBinary() { assertArrayEquals(new byte[] { (byte) 0b11001010, (byte) 0b11100110 }, packed); } + public void testPackAsBinaryDuel() { + int dims = random().nextInt(16, 2049); + int[] toPack = new int[dims]; + for (int i = 0; i < dims; i++) { + toPack[i] = random().nextInt(2); + } + int length = BQVectorUtils.discretize(dims, 64) / 8; + byte[] packed = new byte[length]; + byte[] packedLegacy = new byte[length]; + BQVectorUtils.packAsBinaryLegacy(toPack, packedLegacy); + BQVectorUtils.packAsBinary(toPack, packed); + assertArrayEquals(packedLegacy, packed); + } + public void testPadFloat() { assertArrayEquals(new float[] { 1, 2, 3, 4 }, BQVectorUtils.pad(new float[] { 1, 2, 3, 4 }, 4), DELTA); assertArrayEquals(new float[] { 1, 2, 3, 4 }, BQVectorUtils.pad(new float[] { 1, 2, 3, 4 }, 3), DELTA);