Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/133397.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 133397
summary: Push down loading of singleton dense double based field types to the …
area: "Compute Engine, Codec"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.index.codec.tsdb.TSDBDocValuesEncoder;
import org.elasticsearch.index.mapper.BlockDocValuesReader;
import org.elasticsearch.index.mapper.BlockLoader;

import java.io.IOException;
Expand Down Expand Up @@ -1371,6 +1372,19 @@ public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.D
}
}

@Override
public BlockLoader.Block tryReadDoubles(
BlockLoader.BlockFactory factory,
BlockLoader.Docs docs,
int offset,
BlockDocValuesReader.ToDouble toDouble
) throws IOException {
try (BlockLoader.SingletonLongBuilder builder = factory.singletonLongs(docs.count() - offset)) {
builder.setToDouble(toDouble);
return tryRead(builder, docs, offset);
}
}

@Override
BlockLoader.Block tryRead(BlockLoader.SingletonLongBuilder builder, BlockLoader.Docs docs, int offset) throws IOException {
final int docsCount = docs.count();
Expand Down Expand Up @@ -1768,6 +1782,11 @@ public BlockLoader.Builder endPositionEntry() {
throw new UnsupportedOperationException();
}

@Override
public void setToDouble(BlockDocValuesReader.ToDouble toDouble) {
throw new UnsupportedOperationException();
}

@Override
public void close() {}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,12 @@ private static class SingletonDoubles extends BlockDocValuesReader {

@Override
public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset, boolean nullsFiltered) throws IOException {
if (docValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) {
BlockLoader.Block result = direct.tryReadDoubles(factory, docs, offset, toDouble);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have an OverLong vector or block, I think we don't need to introduce tryReadDoubles. Instead, we can just read longs from the codec and perform the conversion here. This would significantly reduce the complexity in the codec. We can add a method to BlockLoaderFactory that converts a block to doubles with a ToDouble function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a good way to convert a BlockLoader.Block to a DoubleVector or similar? I think I tried this approach first but hit this incompatibility issue.

if (result != null) {
return result;
}
}
try (BlockLoader.DoubleBuilder builder = factory.doublesFromDocValues(docs.count() - offset)) {
for (int i = offset; i < docs.count(); i++) {
int doc = docs.get(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ interface OptionalColumnAtATimeReader {
*/
@Nullable
BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException;

/**
* Specialization for doubles.
* Returns {@code null} if unable to load values as doubles.
*/
@Nullable
default BlockLoader.Block tryReadDoubles(BlockFactory factory, Docs docs, int offset, BlockDocValuesReader.ToDouble toDouble)
throws IOException {
return null;
}
}

interface RowStrideReader extends Reader {
Expand Down Expand Up @@ -537,6 +547,7 @@ interface IntBuilder extends Builder {
* Specialized builder for collecting dense arrays of long values.
*/
interface SingletonLongBuilder extends Builder {
void setToDouble(BlockDocValuesReader.ToDouble toDouble);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think adding a new method to BlockFactory: toDoubles(SingletonLongBuilder longBuilder, BlockDocValuesReader.ToDouble toDouble) would be more manageable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was thinking about this initially, but it would require creating a subclass of DoubleArrayVector wrapping the created LongArrayVector, but that's marked as final. is there a better approach?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, that will be DoubleOverLongVector.


SingletonLongBuilder appendLong(long value);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,19 @@ public LongsBuilder appendLong(long value) {
@Override
public BlockLoader.SingletonLongBuilder singletonLongs(int expectedCount) {
final long[] values = new long[expectedCount];

return new BlockLoader.SingletonLongBuilder() {

private int count;
private BlockDocValuesReader.ToDouble toDouble = null;

@Override
public BlockLoader.Block build() {
if (toDouble != null) {
return new TestBlock(
Arrays.stream(values).mapToDouble(toDouble::convert).boxed().collect(Collectors.toUnmodifiableList())
);
}
return new TestBlock(Arrays.stream(values).boxed().collect(Collectors.toUnmodifiableList()));
}

Expand Down Expand Up @@ -239,6 +246,11 @@ public BlockLoader.Builder endPositionEntry() {
throw new UnsupportedOperationException();
}

@Override
public void setToDouble(BlockDocValuesReader.ToDouble toDouble) {
this.toDouble = toDouble;
}

@Override
public void close() {}
};
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,12 @@ public LongVector newLongArrayVector(long[] values, int positionCount, long preA
return b;
}

public DoubleVector newDoubleOverLongArrayVector(long[] values, BlockUtils.ToDouble toDouble, int positions) {
var v = new DoubleOverLongArrayVector(values, toDouble, positions, this);
adjustBreaker(v.ramBytesUsed());
return v;
}

public final LongBlock newConstantLongBlockWith(long value, int positions) {
return newConstantLongBlockWith(value, positions, 0L);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ private static Block constantBlock(BlockFactory blockFactory, ElementType type,
*/
public record Doc(int shard, int segment, int doc) {}

/**
* Functional interface to convert a long value to a double.
*/
@FunctionalInterface
public interface ToDouble {
double convert(long v);
}

/**
* Read all values from a positions into a java object. This is not fast
* but fine to call in the "fold" path.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.compute.data;

// begin generated imports
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.ReleasableIterator;

import java.util.stream.Collectors;
import java.util.stream.IntStream;
// end generated imports

/**
* Vector implementation that converts long to double values on-the-fly.
*/
final class DoubleOverLongArrayVector extends AbstractVector implements DoubleVector {

static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DoubleOverLongArrayVector.class)
// TODO: remove these extra bytes once `asBlock` returns a block with a separate reference to the vector.
+ RamUsageEstimator.shallowSizeOfInstance(DoubleVectorBlock.class)
// TODO: remove this if/when we account for memory used by Pages
+ Block.PAGE_MEM_OVERHEAD_PER_BLOCK;

private final long[] values;
private final BlockUtils.ToDouble toDouble;

DoubleOverLongArrayVector(long[] values, BlockUtils.ToDouble toDouble, int positionCount, BlockFactory blockFactory) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be DoubleOverLongVector, where we delegate to a LongVector instead?

super(positionCount, blockFactory);
this.values = values;
this.toDouble = toDouble;
}

@Override
public DoubleBlock asBlock() {
return new DoubleVectorBlock(this);
}

@Override
public double getDouble(int position) {
return toDouble.convert(values[position]);
}

@Override
public ElementType elementType() {
return ElementType.DOUBLE;
}

@Override
public boolean isConstant() {
return false;
}

@Override
public DoubleVector filter(int... positions) {
try (DoubleVector.Builder builder = blockFactory().newDoubleVectorBuilder(positions.length)) {
for (int pos : positions) {
builder.appendDouble(toDouble.convert((values[pos])));
}
return builder.build();
}
}

@Override
public DoubleBlock keepMask(BooleanVector mask) {
if (getPositionCount() == 0) {
incRef();
return new DoubleVectorBlock(this);
}
if (mask.isConstant()) {
if (mask.getBoolean(0)) {
incRef();
return new DoubleVectorBlock(this);
}
return (DoubleBlock) blockFactory().newConstantNullBlock(getPositionCount());
}
try (DoubleBlock.Builder builder = blockFactory().newDoubleBlockBuilder(getPositionCount())) {
// TODO if X-ArrayBlock used BooleanVector for it's null mask then we could shuffle references here.
for (int p = 0; p < getPositionCount(); p++) {
if (mask.getBoolean(p)) {
builder.appendDouble(getDouble(p));
} else {
builder.appendNull();
}
}
return builder.build();
}
}

@Override
public ReleasableIterator<DoubleBlock> lookup(IntBlock positions, ByteSizeValue targetBlockSize) {
return new DoubleLookup(asBlock(), positions, targetBlockSize);
}

private static long ramBytesEstimated(long[] values) {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(values);
}

@Override
public long ramBytesUsed() {
return ramBytesEstimated(values);
}

@Override
public boolean equals(Object obj) {
if (obj instanceof DoubleVector that) {
return DoubleVector.equals(this, that);
}
return false;
}

@Override
public int hashCode() {
return DoubleVector.hash(this);
}

@Override
public String toString() {
String valuesString = IntStream.range(0, getPositionCount())
.limit(10)
.mapToObj(n -> String.valueOf(toDouble.convert(values[n])))
.collect(Collectors.joining(", ", "[", getPositionCount() > 10 ? ", ...]" : "]"));
return getClass().getSimpleName() + "[positions=" + getPositionCount() + ", values=" + valuesString + ']';
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public sealed interface $Type$Vector extends Vector permits Constant$Type$Vector
ConstantNullVector {
$elseif(double)$
public sealed interface $Type$Vector extends Vector permits Constant$Type$Vector, $Type$ArrayVector, $Type$BigArrayVector,
ConstantNullVector {
DoubleOverLongArrayVector, ConstantNullVector {
$else$
public sealed interface $Type$Vector extends Vector permits Constant$Type$Vector, $Type$ArrayVector, $Type$BigArrayVector, ConstantNullVector {
$endif$
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.index.mapper.BlockDocValuesReader;
import org.elasticsearch.index.mapper.BlockLoader;

/**
Expand All @@ -22,6 +23,7 @@ public final class SingletonLongBuilder implements BlockLoader.SingletonLongBuil
private final BlockFactory blockFactory;

private int count;
private BlockDocValuesReader.ToDouble toDouble;

public SingletonLongBuilder(int expectedCount, BlockFactory blockFactory) {
this.blockFactory = blockFactory;
Expand Down Expand Up @@ -65,7 +67,10 @@ public long estimatedBytes() {

@Override
public Block build() {
return blockFactory.newLongArrayVector(values, count, 0L).asBlock();
if (toDouble != null) {
return blockFactory.newDoubleOverLongArrayVector(values, toDouble::convert, count).asBlock();
}
return blockFactory.newLongArrayVector(values, count).asBlock();
}

@Override
Expand All @@ -81,6 +86,11 @@ public BlockLoader.SingletonLongBuilder appendLongs(long[] values, int from, int
return this;
}

@Override
public void setToDouble(BlockDocValuesReader.ToDouble toDouble) {
this.toDouble = toDouble;
}

@Override
public void close() {
blockFactory.adjustBreaker(-valuesSize(values.length));
Expand Down