|
23 | 23 | public class BQSpaceUtils { |
24 | 24 |
|
25 | 25 | public static final short B_QUERY = 4; |
26 | | - // the first four bits masked |
27 | | - private static final int B_QUERY_MASK = 15; |
28 | 26 |
|
29 | 27 | /** |
30 | 28 | * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 |
| 29 | + * Transpose the query vector into a byte array allowing for efficient bitwise operations with the |
| 30 | + * index bit vectors. The idea here is to organize the query vector bits such that the first bit |
| 31 | + * of every dimension is in the first set dimensions bits, or (dimensions/8) bytes. The second, |
| 32 | + * third, and fourth bits are in the second, third, and fourth set of dimensions bits, |
| 33 | + * respectively. This allows for direct bitwise comparisons with the stored index vectors through |
| 34 | + * summing the bitwise results with the relative required bit shifts. |
| 35 | + * |
31 | 36 | * @param q the query vector, assumed to be half-byte quantized with values between 0 and 15 |
32 | | - * @param dimensions the number of dimensions in the query vector |
33 | 37 | * @param quantQueryByte the byte array to store the transposed query vector |
34 | 38 | */ |
35 | | - public static void transposeBin(byte[] q, int dimensions, byte[] quantQueryByte) { |
36 | | - // TODO: rewrite this in Panama Vector API |
37 | | - int qOffset = 0; |
38 | | - final byte[] v1 = new byte[4]; |
39 | | - final byte[] v = new byte[32]; |
40 | | - for (int i = 0; i < dimensions; i += 32) { |
41 | | - // for every four bytes we shift left (with remainder across those bytes) |
42 | | - for (int j = 0; j < v.length; j += 4) { |
43 | | - v[j] = (byte) (q[qOffset + j] << B_QUERY | ((q[qOffset + j] >>> B_QUERY) & B_QUERY_MASK)); |
44 | | - v[j + 1] = (byte) (q[qOffset + j + 1] << B_QUERY | ((q[qOffset + j + 1] >>> B_QUERY) & B_QUERY_MASK)); |
45 | | - v[j + 2] = (byte) (q[qOffset + j + 2] << B_QUERY | ((q[qOffset + j + 2] >>> B_QUERY) & B_QUERY_MASK)); |
46 | | - v[j + 3] = (byte) (q[qOffset + j + 3] << B_QUERY | ((q[qOffset + j + 3] >>> B_QUERY) & B_QUERY_MASK)); |
47 | | - } |
48 | | - for (int j = 0; j < B_QUERY; j++) { |
49 | | - moveMaskEpi8Byte(v, v1); |
50 | | - for (int k = 0; k < 4; k++) { |
51 | | - quantQueryByte[(B_QUERY - j - 1) * (dimensions / 8) + i / 8 + k] = v1[k]; |
52 | | - v1[k] = 0; |
53 | | - } |
54 | | - for (int k = 0; k < v.length; k += 4) { |
55 | | - v[k] = (byte) (v[k] + v[k]); |
56 | | - v[k + 1] = (byte) (v[k + 1] + v[k + 1]); |
57 | | - v[k + 2] = (byte) (v[k + 2] + v[k + 2]); |
58 | | - v[k + 3] = (byte) (v[k + 3] + v[k + 3]); |
59 | | - } |
60 | | - } |
61 | | - qOffset += 32; |
62 | | - } |
63 | | - } |
64 | | - |
65 | | - private static void moveMaskEpi8Byte(byte[] v, byte[] v1b) { |
66 | | - int m = 0; |
67 | | - for (int k = 0; k < v.length; k++) { |
68 | | - if ((v[k] & 0b10000000) == 0b10000000) { |
69 | | - v1b[m] |= 0b00000001; |
70 | | - } |
71 | | - if (k % 8 == 7) { |
72 | | - m++; |
73 | | - } else { |
74 | | - v1b[m] <<= 1; |
| 39 | + public static void transposeHalfByte(byte[] q, byte[] quantQueryByte) { |
| 40 | + for (int i = 0; i < q.length;) { |
| 41 | + assert q[i] >= 0 && q[i] <= 15; |
| 42 | + int lowerByte = 0; |
| 43 | + int lowerMiddleByte = 0; |
| 44 | + int upperMiddleByte = 0; |
| 45 | + int upperByte = 0; |
| 46 | + for (int j = 7; j >= 0 && i < q.length; j--) { |
| 47 | + lowerByte |= (q[i] & 1) << j; |
| 48 | + lowerMiddleByte |= ((q[i] >> 1) & 1) << j; |
| 49 | + upperMiddleByte |= ((q[i] >> 2) & 1) << j; |
| 50 | + upperByte |= ((q[i] >> 3) & 1) << j; |
| 51 | + i++; |
75 | 52 | } |
| 53 | + int index = ((i + 7) / 8) - 1; |
| 54 | + quantQueryByte[index] = (byte) lowerByte; |
| 55 | + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; |
| 56 | + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; |
| 57 | + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; |
76 | 58 | } |
77 | 59 | } |
78 | 60 | } |
0 commit comments