Skip to content

Commit 4704ca2

Browse files
committed
Add hardcoded Vector and Blocks for Dense Vector
1 parent 70ada6e commit 4704ca2

13 files changed

+1988
-1
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.compute.data;
9+
10+
// begin generated imports
11+
import org.apache.lucene.util.RamUsageEstimator;
12+
import org.elasticsearch.common.unit.ByteSizeValue;
13+
import org.elasticsearch.core.ReleasableIterator;
14+
// end generated imports
15+
16+
/**
17+
* Vector implementation that stores a constant float[] value.
18+
* This class is generated. Edit {@code X-ConstantVector.java.st} instead.
19+
*/
20+
final class ConstantDenseVectorVector extends AbstractVector implements DenseVectorVector {
21+
22+
static final long RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ConstantDenseVectorVector.class);
23+
24+
private final float[] value;
25+
26+
ConstantDenseVectorVector(float[] value, int positionCount, BlockFactory blockFactory) {
27+
super(positionCount, blockFactory);
28+
this.value = value;
29+
}
30+
31+
@Override
32+
public float[] getDenseVector(int position) {
33+
return value;
34+
}
35+
36+
@Override
37+
public DenseVectorBlock asBlock() {
38+
return new DenseVectorVectorBlock(this);
39+
}
40+
41+
@Override
42+
public DenseVectorVector filter(int... positions) {
43+
return blockFactory().newConstantDenseVectorVector(value, positions.length);
44+
}
45+
46+
@Override
47+
public DenseVectorBlock keepMask(BooleanVector mask) {
48+
if (getPositionCount() == 0) {
49+
incRef();
50+
return new DenseVectorVectorBlock(this);
51+
}
52+
if (mask.isConstant()) {
53+
if (mask.getBoolean(0)) {
54+
incRef();
55+
return new DenseVectorVectorBlock(this);
56+
}
57+
return (DenseVectorBlock) blockFactory().newConstantNullBlock(getPositionCount());
58+
}
59+
try (DenseVectorBlock.Builder builder = blockFactory().newDenseVectorBlockBuilder(getPositionCount())) {
60+
// TODO if X-ArrayBlock used BooleanVector for it's null mask then we could shuffle references here.
61+
for (int p = 0; p < getPositionCount(); p++) {
62+
if (mask.getBoolean(p)) {
63+
builder.appendDenseVector(value);
64+
} else {
65+
builder.appendNull();
66+
}
67+
}
68+
return builder.build();
69+
}
70+
}
71+
72+
@Override
73+
public ReleasableIterator<DenseVectorBlock> lookup(IntBlock positions, ByteSizeValue targetBlockSize) {
74+
if (positions.getPositionCount() == 0) {
75+
return ReleasableIterator.empty();
76+
}
77+
IntVector positionsVector = positions.asVector();
78+
if (positionsVector == null) {
79+
return new DenseVectorLookup(asBlock(), positions, targetBlockSize);
80+
}
81+
int min = positionsVector.min();
82+
if (min < 0) {
83+
throw new IllegalArgumentException("invalid position [" + min + "]");
84+
}
85+
if (min > getPositionCount()) {
86+
return ReleasableIterator.single(
87+
(DenseVectorBlock) positions.blockFactory().newConstantNullBlock(positions.getPositionCount())
88+
);
89+
}
90+
if (positionsVector.max() < getPositionCount()) {
91+
return ReleasableIterator.single(positions.blockFactory().newConstantDenseVectorBlockWith(value, positions.getPositionCount()));
92+
}
93+
return new DenseVectorLookup(asBlock(), positions, targetBlockSize);
94+
}
95+
96+
@Override
97+
public ElementType elementType() {
98+
return ElementType.DENSE_VECTOR;
99+
}
100+
101+
@Override
102+
public boolean isConstant() {
103+
return true;
104+
}
105+
106+
@Override
107+
public DenseVectorVector deepCopy(BlockFactory blockFactory) {
108+
return blockFactory.newConstantDenseVectorVector(value, getPositionCount());
109+
}
110+
111+
@Override
112+
public long ramBytesUsed() {
113+
return RAM_BYTES_USED;
114+
}
115+
116+
@Override
117+
public boolean equals(Object obj) {
118+
if (obj instanceof DenseVectorVector that) {
119+
return DenseVectorVector.equals(this, that);
120+
}
121+
return false;
122+
}
123+
124+
@Override
125+
public int hashCode() {
126+
return DenseVectorVector.hash(this);
127+
}
128+
129+
@Override
130+
public int dimensions() {
131+
return value == null ? 0 : value.length;
132+
}
133+
134+
public String toString() {
135+
return getClass().getSimpleName() + "[positions=" + getPositionCount() + ", value=" + value + ']';
136+
}
137+
}
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.compute.data;
9+
10+
// begin generated imports
11+
import org.apache.lucene.util.RamUsageEstimator;
12+
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.common.unit.ByteSizeValue;
14+
import org.elasticsearch.core.ReleasableIterator;
15+
import org.elasticsearch.core.Releasables;
16+
17+
import java.io.IOException;
18+
import java.util.BitSet;
19+
// end generated imports
20+
21+
/**
22+
* Block implementation that stores values in a {@link DenseVectorArrayVector}.
23+
* This class is generated. Edit {@code X-ArrayBlock.java.st} instead.
24+
*/
25+
public final class DenseVectorArrayBlock extends AbstractArrayBlock implements DenseVectorBlock {
26+
27+
static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DenseVectorArrayBlock.class);
28+
29+
private final DenseVectorArrayVector vector;
30+
31+
DenseVectorArrayBlock(
32+
float[][] values,
33+
int positionCount,
34+
int[] firstValueIndexes,
35+
BitSet nulls,
36+
MvOrdering mvOrdering,
37+
BlockFactory blockFactory
38+
) {
39+
this(
40+
new DenseVectorArrayVector(values, firstValueIndexes == null ? positionCount : firstValueIndexes[positionCount], blockFactory),
41+
positionCount,
42+
firstValueIndexes,
43+
nulls,
44+
mvOrdering
45+
);
46+
}
47+
48+
private DenseVectorArrayBlock(
49+
DenseVectorArrayVector vector, // stylecheck
50+
int positionCount,
51+
int[] firstValueIndexes,
52+
BitSet nulls,
53+
MvOrdering mvOrdering
54+
) {
55+
super(positionCount, firstValueIndexes, nulls, mvOrdering);
56+
this.vector = vector;
57+
assert firstValueIndexes == null
58+
? vector.getPositionCount() == getPositionCount()
59+
: firstValueIndexes[getPositionCount()] == vector.getPositionCount();
60+
}
61+
62+
static DenseVectorArrayBlock readArrayBlock(BlockFactory blockFactory, BlockStreamInput in) throws IOException {
63+
final SubFields sub = new SubFields(blockFactory, in);
64+
int dimensions = in.readInt();
65+
DenseVectorArrayVector vector = null;
66+
boolean success = false;
67+
try {
68+
vector = DenseVectorArrayVector.readArrayVector(sub.vectorPositions(), dimensions, in, blockFactory);
69+
var block = new DenseVectorArrayBlock(vector, sub.positionCount, sub.firstValueIndexes, sub.nullsMask, sub.mvOrdering);
70+
blockFactory.adjustBreaker(block.ramBytesUsed() - vector.ramBytesUsed() - sub.bytesReserved);
71+
success = true;
72+
return block;
73+
} finally {
74+
if (success == false) {
75+
Releasables.close(vector);
76+
blockFactory.adjustBreaker(-sub.bytesReserved);
77+
}
78+
}
79+
}
80+
81+
void writeArrayBlock(StreamOutput out) throws IOException {
82+
writeSubFields(out);
83+
out.writeInt(vector.dimensions());
84+
vector.writeArrayVector(vector.getPositionCount(), out);
85+
}
86+
87+
@Override
88+
public DenseVectorVector asVector() {
89+
return null;
90+
}
91+
92+
@Override
93+
public float[] getDenseVector(int valueIndex) {
94+
return vector.getDenseVector(valueIndex);
95+
}
96+
97+
@Override
98+
public DenseVectorBlock filter(int... positions) {
99+
try (var builder = blockFactory().newDenseVectorBlockBuilder(positions.length)) {
100+
for (int pos : positions) {
101+
if (isNull(pos)) {
102+
builder.appendNull();
103+
continue;
104+
}
105+
int valueCount = getValueCount(pos);
106+
int first = getFirstValueIndex(pos);
107+
if (valueCount == 1) {
108+
builder.appendDenseVector(getDenseVector(first));
109+
} else {
110+
builder.beginPositionEntry();
111+
for (int c = 0; c < valueCount; c++) {
112+
builder.appendDenseVector(getDenseVector(first + c));
113+
}
114+
builder.endPositionEntry();
115+
}
116+
}
117+
return builder.mvOrdering(mvOrdering()).build();
118+
}
119+
}
120+
121+
@Override
122+
public DenseVectorBlock keepMask(BooleanVector mask) {
123+
if (getPositionCount() == 0) {
124+
incRef();
125+
return this;
126+
}
127+
if (mask.isConstant()) {
128+
if (mask.getBoolean(0)) {
129+
incRef();
130+
return this;
131+
}
132+
return (DenseVectorBlock) blockFactory().newConstantNullBlock(getPositionCount());
133+
}
134+
try (DenseVectorBlock.Builder builder = blockFactory().newDenseVectorBlockBuilder(getPositionCount())) {
135+
// TODO if X-ArrayBlock used BooleanVector for it's null mask then we could shuffle references here.
136+
for (int p = 0; p < getPositionCount(); p++) {
137+
if (false == mask.getBoolean(p)) {
138+
builder.appendNull();
139+
continue;
140+
}
141+
int valueCount = getValueCount(p);
142+
if (valueCount == 0) {
143+
builder.appendNull();
144+
continue;
145+
}
146+
int start = getFirstValueIndex(p);
147+
if (valueCount == 1) {
148+
builder.appendDenseVector(getDenseVector(start));
149+
continue;
150+
}
151+
int end = start + valueCount;
152+
builder.beginPositionEntry();
153+
for (int i = start; i < end; i++) {
154+
builder.appendDenseVector(getDenseVector(i));
155+
}
156+
builder.endPositionEntry();
157+
}
158+
return builder.build();
159+
}
160+
}
161+
162+
@Override
163+
public ReleasableIterator<DenseVectorBlock> lookup(IntBlock positions, ByteSizeValue targetBlockSize) {
164+
return new DenseVectorLookup(this, positions, targetBlockSize);
165+
}
166+
167+
@Override
168+
public ElementType elementType() {
169+
return ElementType.DENSE_VECTOR;
170+
}
171+
172+
@Override
173+
public DenseVectorBlock expand() {
174+
if (firstValueIndexes == null) {
175+
incRef();
176+
return this;
177+
}
178+
if (nullsMask == null) {
179+
vector.incRef();
180+
return vector.asBlock();
181+
}
182+
183+
// The following line is correct because positions with multi-values are never null.
184+
int expandedPositionCount = vector.getPositionCount();
185+
long bitSetRamUsedEstimate = Math.max(nullsMask.size(), BlockRamUsageEstimator.sizeOfBitSet(expandedPositionCount));
186+
blockFactory().adjustBreaker(bitSetRamUsedEstimate);
187+
188+
DenseVectorArrayBlock expanded = new DenseVectorArrayBlock(
189+
vector,
190+
expandedPositionCount,
191+
null,
192+
shiftNullsToExpandedPositions(),
193+
MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING
194+
);
195+
blockFactory().adjustBreaker(expanded.ramBytesUsedOnlyBlock() - bitSetRamUsedEstimate);
196+
// We need to incRef after adjusting any breakers, otherwise we might leak the vector if the breaker trips.
197+
vector.incRef();
198+
return expanded;
199+
}
200+
201+
private long ramBytesUsedOnlyBlock() {
202+
return BASE_RAM_BYTES_USED + BlockRamUsageEstimator.sizeOf(firstValueIndexes) + BlockRamUsageEstimator.sizeOfBitSet(nullsMask);
203+
}
204+
205+
@Override
206+
public long ramBytesUsed() {
207+
return ramBytesUsedOnlyBlock() + vector.ramBytesUsed();
208+
}
209+
210+
@Override
211+
public boolean equals(Object obj) {
212+
if (obj instanceof DenseVectorBlock that) {
213+
return DenseVectorBlock.equals(this, that);
214+
}
215+
return false;
216+
}
217+
218+
@Override
219+
public int hashCode() {
220+
return DenseVectorBlock.hash(this);
221+
}
222+
223+
@Override
224+
public String toString() {
225+
return getClass().getSimpleName()
226+
+ "[positions="
227+
+ getPositionCount()
228+
+ ", mvOrdering="
229+
+ mvOrdering()
230+
+ ", vector="
231+
+ vector
232+
+ ']';
233+
}
234+
235+
@Override
236+
public void allowPassingToDifferentDriver() {
237+
vector.allowPassingToDifferentDriver();
238+
}
239+
240+
@Override
241+
public BlockFactory blockFactory() {
242+
return vector.blockFactory();
243+
}
244+
245+
@Override
246+
public void closeInternal() {
247+
blockFactory().adjustBreaker(-ramBytesUsedOnlyBlock());
248+
Releasables.closeExpectNoException(vector);
249+
}
250+
}

0 commit comments

Comments
 (0)