From 8889ab519b185015b2d5b6f2b4402e968b54b2ac Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 2 Sep 2025 11:43:28 +0200 Subject: [PATCH 01/25] First attempt at ToDenseVector function --- .../ToDenseVectorFromBooleanEvaluator.java | 132 +++++++++++++++ .../ToDenseVectorFromDoubleEvaluator.java | 132 +++++++++++++++ .../ToDenseVectorFromIntEvaluator.java | 132 +++++++++++++++ .../ToDenseVectorFromLongEvaluator.java | 132 +++++++++++++++ .../ToDenseVectorFromStringEvaluator.java | 150 ++++++++++++++++++ ...oDenseVectorFromUnsignedLongEvaluator.java | 132 +++++++++++++++ .../esql/expression/ExpressionWritables.java | 2 + .../scalar/convert/ToDenseVector.java | 120 ++++++++++++++ .../scalar/convert/ToDenseVectorTests.java | 105 ++++++++++++ 9 files changed, 1037 insertions(+) create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromDoubleEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromIntEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromLongEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java new file mode 100644 index 0000000000000..582fcdb070a3b --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BooleanVector; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ToDenseVectorFromBooleanEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromBooleanEvaluator.class); + + private final EvalOperator.ExpressionEvaluator bool; + + public ToDenseVectorFromBooleanEvaluator(Source source, EvalOperator.ExpressionEvaluator bool, + DriverContext driverContext) { + super(driverContext, source); + this.bool = bool; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return bool; + } + + @Override + public Block evalVector(Vector v) { + BooleanVector vector = (BooleanVector) v; + int positionCount = v.getPositionCount(); + if (vector.isConstant()) { + return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); + } + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendFloat(evalValue(vector, p)); + } + return builder.build(); + } + } + + private float evalValue(BooleanVector container, int index) { + boolean value = container.getBoolean(index); + return ToDenseVector.fromBoolean(value); + } + + @Override + public Block evalBlock(Block b) { + BooleanBlock block = (BooleanBlock) b; + int positionCount = block.getPositionCount(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + float value = evalValue(block, i); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendFloat(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private float evalValue(BooleanBlock container, int index) { + boolean value = container.getBoolean(index); + return ToDenseVector.fromBoolean(value); + } + + @Override + public String toString() { + return "ToDenseVectorFromBooleanEvaluator[" + "bool=" + bool + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(bool); + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += bool.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory bool; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory bool) { + this.source = source; + this.bool = bool; + } + + @Override + public ToDenseVectorFromBooleanEvaluator get(DriverContext context) { + return new ToDenseVectorFromBooleanEvaluator(source, bool.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromBooleanEvaluator[" + "bool=" + bool + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromDoubleEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromDoubleEvaluator.java new file mode 100644 index 0000000000000..a5fe8c25610ed --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromDoubleEvaluator.java @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.DoubleVector; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ToDenseVectorFromDoubleEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromDoubleEvaluator.class); + + private final EvalOperator.ExpressionEvaluator d; + + public ToDenseVectorFromDoubleEvaluator(Source source, EvalOperator.ExpressionEvaluator d, + DriverContext driverContext) { + super(driverContext, source); + this.d = d; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return d; + } + + @Override + public Block evalVector(Vector v) { + DoubleVector vector = (DoubleVector) v; + int positionCount = v.getPositionCount(); + if (vector.isConstant()) { + return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); + } + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendFloat(evalValue(vector, p)); + } + return builder.build(); + } + } + + private float evalValue(DoubleVector container, int index) { + double value = container.getDouble(index); + return ToDenseVector.fromDouble(value); + } + + @Override + public Block evalBlock(Block b) { + DoubleBlock block = (DoubleBlock) b; + int positionCount = block.getPositionCount(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + float value = evalValue(block, i); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendFloat(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private float evalValue(DoubleBlock container, int index) { + double value = container.getDouble(index); + return ToDenseVector.fromDouble(value); + } + + @Override + public String toString() { + return "ToDenseVectorFromDoubleEvaluator[" + "d=" + d + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(d); + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += d.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory d; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory d) { + this.source = source; + this.d = d; + } + + @Override + public ToDenseVectorFromDoubleEvaluator get(DriverContext context) { + return new ToDenseVectorFromDoubleEvaluator(source, d.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromDoubleEvaluator[" + "d=" + d + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromIntEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromIntEvaluator.java new file mode 100644 index 0000000000000..ff6ccb6e86917 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromIntEvaluator.java @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ToDenseVectorFromIntEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromIntEvaluator.class); + + private final EvalOperator.ExpressionEvaluator i; + + public ToDenseVectorFromIntEvaluator(Source source, EvalOperator.ExpressionEvaluator i, + DriverContext driverContext) { + super(driverContext, source); + this.i = i; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return i; + } + + @Override + public Block evalVector(Vector v) { + IntVector vector = (IntVector) v; + int positionCount = v.getPositionCount(); + if (vector.isConstant()) { + return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); + } + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendFloat(evalValue(vector, p)); + } + return builder.build(); + } + } + + private float evalValue(IntVector container, int index) { + int value = container.getInt(index); + return ToDenseVector.fromInt(value); + } + + @Override + public Block evalBlock(Block b) { + IntBlock block = (IntBlock) b; + int positionCount = block.getPositionCount(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + float value = evalValue(block, i); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendFloat(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private float evalValue(IntBlock container, int index) { + int value = container.getInt(index); + return ToDenseVector.fromInt(value); + } + + @Override + public String toString() { + return "ToDenseVectorFromIntEvaluator[" + "i=" + i + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(i); + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += i.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory i; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory i) { + this.source = source; + this.i = i; + } + + @Override + public ToDenseVectorFromIntEvaluator get(DriverContext context) { + return new ToDenseVectorFromIntEvaluator(source, i.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromIntEvaluator[" + "i=" + i + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromLongEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromLongEvaluator.java new file mode 100644 index 0000000000000..4ca69984ff540 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromLongEvaluator.java @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ToDenseVectorFromLongEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromLongEvaluator.class); + + private final EvalOperator.ExpressionEvaluator l; + + public ToDenseVectorFromLongEvaluator(Source source, EvalOperator.ExpressionEvaluator l, + DriverContext driverContext) { + super(driverContext, source); + this.l = l; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return l; + } + + @Override + public Block evalVector(Vector v) { + LongVector vector = (LongVector) v; + int positionCount = v.getPositionCount(); + if (vector.isConstant()) { + return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); + } + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendFloat(evalValue(vector, p)); + } + return builder.build(); + } + } + + private float evalValue(LongVector container, int index) { + long value = container.getLong(index); + return ToDenseVector.fromLong(value); + } + + @Override + public Block evalBlock(Block b) { + LongBlock block = (LongBlock) b; + int positionCount = block.getPositionCount(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + float value = evalValue(block, i); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendFloat(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private float evalValue(LongBlock container, int index) { + long value = container.getLong(index); + return ToDenseVector.fromLong(value); + } + + @Override + public String toString() { + return "ToDenseVectorFromLongEvaluator[" + "l=" + l + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(l); + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += l.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory l; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory l) { + this.source = source; + this.l = l; + } + + @Override + public ToDenseVectorFromLongEvaluator get(DriverContext context) { + return new ToDenseVectorFromLongEvaluator(source, l.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromLongEvaluator[" + "l=" + l + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java new file mode 100644 index 0000000000000..f53fbe12221df --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java @@ -0,0 +1,150 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ToDenseVectorFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromStringEvaluator.class); + + private final EvalOperator.ExpressionEvaluator in; + + public ToDenseVectorFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator in, + DriverContext driverContext) { + super(driverContext, source); + this.in = in; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return in; + } + + @Override + public Block evalVector(Vector v) { + BytesRefVector vector = (BytesRefVector) v; + int positionCount = v.getPositionCount(); + BytesRef scratchPad = new BytesRef(); + if (vector.isConstant()) { + try { + return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0, scratchPad), positionCount); + } catch (InvalidArgumentException e) { + registerException(e); + return driverContext.blockFactory().newConstantNullBlock(positionCount); + } + } + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + try { + builder.appendFloat(evalValue(vector, p, scratchPad)); + } catch (InvalidArgumentException e) { + registerException(e); + builder.appendNull(); + } + } + return builder.build(); + } + } + + private float evalValue(BytesRefVector container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ToDenseVector.fromString(value); + } + + @Override + public Block evalBlock(Block b) { + BytesRefBlock block = (BytesRefBlock) b; + int positionCount = block.getPositionCount(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + BytesRef scratchPad = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + try { + float value = evalValue(block, i, scratchPad); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendFloat(value); + valuesAppended = true; + } catch (InvalidArgumentException e) { + registerException(e); + } + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private float evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ToDenseVector.fromString(value); + } + + @Override + public String toString() { + return "ToDenseVectorFromStringEvaluator[" + "in=" + in + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(in); + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += in.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory in; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory in) { + this.source = source; + this.in = in; + } + + @Override + public ToDenseVectorFromStringEvaluator get(DriverContext context) { + return new ToDenseVectorFromStringEvaluator(source, in.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromStringEvaluator[" + "in=" + in + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java new file mode 100644 index 0000000000000..412522b47a560 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ToDenseVectorFromUnsignedLongEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromUnsignedLongEvaluator.class); + + private final EvalOperator.ExpressionEvaluator l; + + public ToDenseVectorFromUnsignedLongEvaluator(Source source, EvalOperator.ExpressionEvaluator l, + DriverContext driverContext) { + super(driverContext, source); + this.l = l; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return l; + } + + @Override + public Block evalVector(Vector v) { + LongVector vector = (LongVector) v; + int positionCount = v.getPositionCount(); + if (vector.isConstant()) { + return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); + } + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + builder.appendFloat(evalValue(vector, p)); + } + return builder.build(); + } + } + + private float evalValue(LongVector container, int index) { + long value = container.getLong(index); + return ToDenseVector.fromUnsignedLong(value); + } + + @Override + public Block evalBlock(Block b) { + LongBlock block = (LongBlock) b; + int positionCount = block.getPositionCount(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + float value = evalValue(block, i); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendFloat(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private float evalValue(LongBlock container, int index) { + long value = container.getLong(index); + return ToDenseVector.fromUnsignedLong(value); + } + + @Override + public String toString() { + return "ToDenseVectorFromUnsignedLongEvaluator[" + "l=" + l + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(l); + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += l.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory l; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory l) { + this.source = source; + this.l = l; + } + + @Override + public ToDenseVectorFromUnsignedLongEvaluator get(DriverContext context) { + return new ToDenseVectorFromUnsignedLongEvaluator(source, l.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromUnsignedLongEvaluator[" + "l=" + l + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 8866337e8f15c..52856a78aa5d5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -22,6 +22,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDegrees; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape; @@ -204,6 +205,7 @@ public static List unaryScalars() { entries.add(ToDatetime.ENTRY); entries.add(ToDateNanos.ENTRY); entries.add(ToDegrees.ENTRY); + entries.add(ToDenseVector.ENTRY); entries.add(ToDouble.ENTRY); entries.add(ToGeoShape.ENTRY); entries.add(ToCartesianShape.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java new file mode 100644 index 0000000000000..11b4f777976cf --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -0,0 +1,120 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.compute.ann.ConvertEvaluator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToDouble; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.unsignedLongToDouble; + +public class ToDenseVector extends AbstractConvertFunction { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "ToDenseVector", + ToDenseVector::new + ); + + private static final Map EVALUATORS = Map.ofEntries( + Map.entry(DENSE_VECTOR, (source, fieldEval) -> fieldEval), + Map.entry(KEYWORD, ToDenseVectorFromStringEvaluator.Factory::new), + Map.entry(TEXT, ToDenseVectorFromStringEvaluator.Factory::new), + Map.entry(UNSIGNED_LONG, ToDenseVectorFromUnsignedLongEvaluator.Factory::new), + Map.entry(LONG, ToDenseVectorFromLongEvaluator.Factory::new), + Map.entry(INTEGER, ToDenseVectorFromIntEvaluator.Factory::new), + Map.entry(DOUBLE, ToDenseVectorFromDoubleEvaluator.Factory::new) + ); + + @FunctionInfo( + returnType = "dense_vector", + description = "Converts a multi-valued input of numbers or strings to a dense_vector." + ) + public ToDenseVector( + Source source, + @Param( + name = "field", + type = { "keyword", "text", "double", "long", "unsigned_long", "integer", "float" }, + description = "Input multi-valued column or an expression." + ) Expression field + ) { + super(source, field); + } + + private ToDenseVector(StreamInput in) throws IOException { + super(in); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + protected Map factories() { + return EVALUATORS; + } + + @Override + public DataType dataType() { + return DENSE_VECTOR; + } + + @Override + public Expression replaceChildren(List newChildren) { + return new ToDenseVector(source(), newChildren.get(0)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, ToDenseVector::new, field()); + } + + @ConvertEvaluator(extraName = "FromString", warnExceptions = { org.elasticsearch.xpack.esql.core.InvalidArgumentException.class }) + static float fromString(BytesRef in) { + return (float) stringToDouble(in.utf8ToString()); + } + + @ConvertEvaluator(extraName = "FromUnsignedLong") + static float fromUnsignedLong(long l) { + return (float) unsignedLongToDouble(l); + } + + @ConvertEvaluator(extraName = "FromLong") + static float fromLong(long l) { + return l; + } + + @ConvertEvaluator(extraName = "FromInt") + static float fromInt(int i) { + return i; + } + + @ConvertEvaluator(extraName = "FromDouble") + static float fromDouble(double d) { + return (float) d; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java new file mode 100644 index 0000000000000..6319da5e0f080 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -0,0 +1,105 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +public class ToDenseVectorTests extends AbstractScalarFunctionTestCase { + + public ToDenseVectorTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + + suppliers.add( + new TestCaseSupplier( + "int", + List.of(DataType.INTEGER), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + 1, + DataType.INTEGER, + "int" + ) + ), + evaluatorName("Int", "i"), + DataType.DENSE_VECTOR, + equalTo(1.0f) + ) + ) + ); + + // Multi-valued inputs + suppliers.add( + new TestCaseSupplier( + "mv_long", + List.of(DataType.LONG), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + List.of(1L, 2L, 3L), + DataType.LONG, + "mv_long" + ) + ), + evaluatorName("Long", "l"), + DataType.DENSE_VECTOR, + equalTo(List.of(1.0f, 2.0f, 3.0f)) + ) + ) + ); + + suppliers.add( + new TestCaseSupplier( + "mv_string", + List.of(DataType.KEYWORD), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + List.of(new BytesRef("1.1"), new BytesRef("2.2")), + DataType.KEYWORD, + "mv_string" + ) + ), + evaluatorName("String", "in"), + DataType.DENSE_VECTOR, + equalTo(List.of(1.1f, 2.2f)) + ) + ) + ); + + return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); + } + + private static String evaluatorName(String inner, String next) { + String read = "Attribute[channel=0]"; + return "ToDenseVectorFrom" + inner + "Evaluator[" + next + "=" + read + "]"; + } + + @Override + protected Expression build(Source source, List args) { + return new ToDenseVector(source, args.get(0)); + } +} From 7383dc98f019400179033fe555a35a749a2275dc Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 2 Sep 2025 14:11:23 +0200 Subject: [PATCH 02/25] Add tests for casting using ToDenseVector --- .../src/main/resources/knn-function.csv-spec | 29 ++++++- .../vector-cosine-similarity.csv-spec | 31 +++++++- .../resources/vector-dot-product.csv-spec | 30 +++++++- .../main/resources/vector-hamming.csv-spec | 32 ++++++-- .../main/resources/vector-l1-norm.csv-spec | 32 ++++++-- .../main/resources/vector-l2-norm.csv-spec | 32 ++++++-- .../main/resources/vector-magnitude.csv-spec | 12 +++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../xpack/esql/analysis/Analyzer.java | 30 +++++--- .../xpack/esql/analysis/AnalyzerTests.java | 76 +++++++++++++++++-- 10 files changed, 267 insertions(+), 44 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 2cad34e324fda..caf19eb6825d9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -1,7 +1,3 @@ -# TODO Most tests explicitly set k. Until knn function uses LIMIT as k, we need to explicitly set it to all values -# in the dataset to avoid test failures due to docs allocation in different shards, which can impact results for a -# top-n query at the shard level - knnSearch required_capability: knn_function_v3 @@ -306,3 +302,28 @@ c: long | primary: boolean 41 | false 9 | true ; + +knnWithCasting +required_capability: knn_function_v3 +required_capability: to_dense_vector_function + +from colors metadata _score +| eval query = [0, 120, 0] +| where knn(rgb_vector, query, 10) +| sort _score desc, color asc +| keep color, rgb_vector +| limit 10 +; + +color:text | rgb_vector:dense_vector +green | [0.0, 128.0, 0.0] +black | [0.0, 0.0, 0.0] +olive | [128.0, 128.0, 0.0] +teal | [0.0, 128.0, 128.0] +lime | [0.0, 255.0, 0.0] +sienna | [160.0, 82.0, 45.0] +maroon | [128.0, 0.0, 0.0] +navy | [0.0, 0.0, 128.0] +gray | [128.0, 128.0, 128.0] +chartreuse | [127.0, 255.0, 0.0] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec index 46d80609a06bf..b856ccd5f172e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec @@ -90,17 +90,40 @@ total_null:long 59 ; -# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector -similarityWithRow-Ignore +similarityWithRow required_capability: cosine_vector_similarity_function +required_capability: to_dense_vector_function row vector = [1, 2, 3] | eval similarity = round(v_cosine(vector, [0, 1, 2]), 3) +; + +vector: integer | similarity:double +[1, 2, 3] | 0.978 +; + +similarityWithVectorField +required_capability: cosine_vector_similarity_function +required_capability: to_dense_vector_function + +from colors +| where color != "black" +| eval query = [0, 255, 255] +| eval similarity = v_cosine(rgb_vector, query) | sort similarity desc, color asc | limit 10 | keep color, similarity ; -similarity:double -0.978 +color:text | similarity:double +cyan | 1.0 +teal | 1.0 +turquoise | 0.9890533685684204 +aqua marine | 0.964962363243103 +azure | 0.916246771812439 +lavender | 0.9136701822280884 +mint cream | 0.9122757911682129 +honeydew | 0.9122424125671387 +gainsboro | 0.9082483053207397 +gray | 0.9082483053207397 ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec index b6d32b5ae651b..d9d3d655625be 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec @@ -88,17 +88,39 @@ total_null:long ; -# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector -similarityWithRow-Ignore +similarityWithRow required_capability: dot_product_vector_similarity_function +required_capability: to_dense_vector_function row vector = [1, 2, 3] | eval similarity = round(v_dot_product(vector, [0, 1, 2]), 3) +; + +vector: integer | similarity:double +[1, 2, 3] | 4.5 +; + +similarityWithVectorField +required_capability: dot_product_vector_similarity_function +required_capability: to_dense_vector_function + +from colors +| eval query = [0, 255, 255] +| eval similarity = v_dot_product(rgb_vector, query) | sort similarity desc, color asc | limit 10 | keep color, similarity ; -similarity:double -0.978 +color:text | similarity:double +azure | 65025.5 +cyan | 65025.5 +white | 65025.5 +mint cream | 64388.0 +snow | 63750.5 +honeydew | 63113.0 +ivory | 63113.0 +sea shell | 61583.0 +lavender | 61200.5 +old lace | 60563.0 ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec index a7e8815139567..9f372f104a689 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec @@ -87,17 +87,39 @@ total_null:long 59 ; -# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector -similarityWithRow-Ignore +similarityWithRow required_capability: hamming_vector_similarity_function +required_capability: to_dense_vector_function row vector = [1, 2, 3] | eval similarity = round(v_hamming(vector, [0, 1, 2]), 3) +; + +vector: integer | similarity:double +[1, 2, 3] | 4.0 +; + +similarityWithVectorField +required_capability: hamming_vector_similarity_function +required_capability: to_dense_vector_function + +from colors +| eval query = [0, 255, 255] +| eval similarity = v_hamming(rgb_vector, query) | sort similarity desc, color asc | limit 10 | keep color, similarity ; - -similarity:double -0.978 + +color:text | similarity:double +red | 24.0 +orange | 20.0 +gold | 18.0 +indigo | 18.0 +bisque | 17.0 +maroon | 17.0 +pink | 17.0 +salmon | 17.0 +black | 16.0 +firebrick | 16.0 ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec index 53f550dd4fe1f..8c9ba8406267a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec @@ -87,17 +87,39 @@ total_null:long 59 ; -# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector -similarityWithRow-Ignore +similarityWithRow required_capability: l1_norm_vector_similarity_function +required_capability: to_dense_vector_function row vector = [1, 2, 3] | eval similarity = round(v_l1_norm(vector, [0, 1, 2]), 3) +; + +vector: integer | similarity:double +[1, 2, 3] | 3.0 +; + +similarityWithVectorField +required_capability: l1_norm_vector_similarity_function +required_capability: to_dense_vector_function + +from colors +| eval query = [0, 255, 255] +| eval similarity = v_l1_norm(rgb_vector, query) | sort similarity desc, color asc | limit 10 | keep color, similarity ; - -similarity:double -0.978 + +color:text | similarity:double +red | 765.0 +crimson | 650.0 +maroon | 638.0 +firebrick | 620.0 +orange | 600.0 +tomato | 595.0 +brown | 591.0 +chocolate | 585.0 +coral | 558.0 +gold | 550.0 ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec index 03a094ed93cad..0d1e0cf7d2835 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec @@ -87,17 +87,39 @@ total_null:long 59 ; -# TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector -similarityWithRow-Ignore +similarityWithRow required_capability: l2_norm_vector_similarity_function +required_capability: to_dense_vector_function row vector = [1, 2, 3] | eval similarity = round(v_l2_norm(vector, [0, 1, 2]), 3) +; + +vector: integer | similarity:double +[1, 2, 3] | 1.732 +; + +similarityWithVectorField +required_capability: l2_norm_vector_similarity_function +required_capability: to_dense_vector_function + +from colors +| eval query = [0, 255, 255] +| eval similarity = v_l2_norm(rgb_vector, query) | sort similarity desc, color asc | limit 10 | keep color, similarity ; - -similarity:double -0.978 + +color:text | similarity:double +red | 441.6729431152344 +maroon | 382.6669616699219 +crimson | 376.36419677734375 +orange | 371.68536376953125 +gold | 362.8360595703125 +black | 360.62445068359375 +magenta | 360.62445068359375 +yellow | 360.62445068359375 +firebrick | 359.67486572265625 +tomato | 351.0227966308594 ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec index c670cb9ec678e..58c2c9df9ea24 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec @@ -85,3 +85,15 @@ row a = 1 magnitude:double null ; + +magnitudeWithRow +required_capability: magnitude_scalar_vector_function +required_capability: to_dense_vector_function + +row vector = [1, 2, 3] +| eval magnitude = round(v_magnitude(vector), 3) +; + +vector: integer | magnitude:double +[1, 2, 3] | 3.742 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index dc9f6f3b3978d..bf7c51053f96e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1416,7 +1416,12 @@ public enum Cap { /** * URL encoding function. */ - URL_ENCODE(Build.current().isSnapshot()); + URL_ENCODE(Build.current().isSnapshot()), + + /** + * TO_DENSE_VECTOR function. + */ + TO_DENSE_VECTOR_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 50a061ab0d6ef..4a496ebf80ec0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -76,6 +76,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromAggregateMetricDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; @@ -168,6 +169,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount; import static org.elasticsearch.xpack.esql.telemetry.FeatureMetric.LIMIT; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.maybeParseTemporalAmount; @@ -1668,18 +1670,24 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor List args = vectorFunction.arguments(); List newArgs = new ArrayList<>(); for (Expression arg : args) { - if (arg.resolved() && arg.dataType().isNumeric() && arg.foldable()) { - Object folded = arg.fold(FoldContext.small() /* TODO remove me */); - if (folded instanceof List) { - // Convert to floats so blocks are created accordingly - List floatVector; - if (arg.dataType() == FLOAT) { - floatVector = (List) folded; - } else { - floatVector = ((List) folded).stream().map(Number::floatValue).collect(Collectors.toList()); + if (arg.resolved()) { + if (arg.foldable() && arg.dataType().isNumeric()) { + Object folded = arg.fold(FoldContext.small() /* TODO remove me */); + if (folded instanceof List) { + // Convert to floats so blocks are created accordingly + List floatVector; + if (arg.dataType() == FLOAT) { + floatVector = (List) folded; + } else { + floatVector = ((List) folded).stream().map(Number::floatValue).collect(Collectors.toList()); + } + Literal denseVector = new Literal(arg.source(), floatVector, DataType.DENSE_VECTOR); + newArgs.add(denseVector); + continue; } - Literal denseVector = new Literal(arg.source(), floatVector, DataType.DENSE_VECTOR); - newArgs.add(denseVector); + } else if (arg.dataType().isNumeric() || isString(arg.dataType())) { + // add casting function + newArgs.add(new ToDenseVector(arg.source(), arg)); continue; } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index f26c14db41604..257f27abbe79b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -57,6 +57,7 @@ import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; @@ -2352,20 +2353,36 @@ public void testDenseVectorImplicitCastingKnn() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); checkDenseVectorCastingKnn("float_vector"); + checkDenseVectorCastingKnn("byte_vector"); + checkDenseVectorEvalCastingKnn("float_vector"); + checkDenseVectorEvalCastingKnn("byte_vector"); } private static void checkDenseVectorCastingKnn(String fieldName) { var plan = analyze(String.format(Locale.ROOT, """ - from test | where knn(%s, [0.342, 0.164, 0.234], 10) + from test | where knn(%s, [0, 1, 2], 10) """, fieldName), "mapping-dense_vector.json"); var limit = as(plan, Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - var field = knn.field(); var queryVector = as(knn.query(), Literal.class); assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); - assertThat(queryVector.value(), equalTo(List.of(0.342f, 0.164f, 0.234f))); + assertThat(queryVector.value(), equalTo(List.of(0.0f, 1.0f, 2.0f))); + } + + private static void checkDenseVectorEvalCastingKnn(String fieldName) { + var plan = analyze(String.format(Locale.ROOT, """ + from test | eval query = [0, 1, 2] | where knn(%s, query, 10) + """, fieldName), "mapping-dense_vector.json"); + + var limit = as(plan, Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + var queryVector = as(knn.query(), ToDenseVector.class); + assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); + var refAttr = as(queryVector.children().get(0), ReferenceAttribute.class); + assertThat(refAttr.name(), is("query")); } public void testDenseVectorImplicitCastingSimilarityFunctions() { @@ -2411,6 +2428,56 @@ private void checkDenseVectorImplicitCastingSimilarityFunction(String similarity from test | eval similarity = %s """, similarityFunction), "mapping-dense_vector.json"); + var limit = as(plan, Limit.class); + var eval = as(limit.child(), Eval.class); + var alias = as(eval.fields().get(0), Alias.class); + assertEquals("similarity", alias.name()); + var similarity = as(alias.child(), VectorSimilarityFunction.class); + var left = as(similarity.left(), FieldAttribute.class); + var queryVector = as(similarity.right(), ToDenseVector.class); + assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); + var refAttr = as(queryVector.children().get(0), ReferenceAttribute.class); + assertThat(refAttr.name(), is("query")); + } + + public void testDenseVectorEvalCastingSimilarityFunctions() { + if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorEvalCastingSimilarityFunction( + "v_cosine(float_vector, query)" + ); + checkDenseVectorEvalCastingSimilarityFunction("v_cosine(byte_vector, query)"); + } + if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorEvalCastingSimilarityFunction( + "v_dot_product(float_vector, query)" + ); + checkDenseVectorEvalCastingSimilarityFunction("v_dot_product(byte_vector, query)"); + } + if (EsqlCapabilities.Cap.L1_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorEvalCastingSimilarityFunction( + "v_l1_norm(float_vector, query)" + ); + checkDenseVectorEvalCastingSimilarityFunction("v_l1_norm(byte_vector, query)"); + } + if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorEvalCastingSimilarityFunction( + "v_l2_norm(float_vector, query)" + ); + checkDenseVectorEvalCastingSimilarityFunction("v_l2_norm(float_vector, query)"); + } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkDenseVectorEvalCastingSimilarityFunction( + "v_hamming(byte_vector, query)" + ); + checkDenseVectorEvalCastingSimilarityFunction("v_hamming(byte_vector, query)"); + } + } + + private void checkDenseVectorEvalCastingSimilarityFunction(String similarityFunction) { + var plan = analyze(String.format(Locale.ROOT, """ + from test | eval query = [0.342, 0.164, 0.234] | eval similarity = %s + """, similarityFunction), "mapping-dense_vector.json"); + var limit = as(plan, Limit.class); var eval = as(limit.child(), Eval.class); var alias = as(eval.fields().get(0), Alias.class); @@ -2418,9 +2485,8 @@ private void checkDenseVectorImplicitCastingSimilarityFunction(String similarity var similarity = as(alias.child(), VectorSimilarityFunction.class); var left = as(similarity.left(), FieldAttribute.class); assertThat(List.of("float_vector", "byte_vector"), hasItem(left.name())); - var right = as(similarity.right(), Literal.class); + var right = as(similarity.right(), ToDenseVector.class); assertThat(right.dataType(), is(DENSE_VECTOR)); - assertThat(right.value(), equalTo(expectedElems)); } public void testNoDenseVectorFailsSimilarityFunction() { From 59782e75b427cde1514fb8eda3090692715b3922 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 2 Sep 2025 17:32:28 +0200 Subject: [PATCH 03/25] Improve tests, remove unsigned long --- ...oDenseVectorFromUnsignedLongEvaluator.java | 132 ------------------ .../scalar/convert/ToDenseVector.java | 10 +- .../scalar/convert/ToDenseVectorTests.java | 62 ++++---- 3 files changed, 36 insertions(+), 168 deletions(-) delete mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java deleted file mode 100644 index 412522b47a560..0000000000000 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromUnsignedLongEvaluator.java +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License -// 2.0; you may not use this file except in compliance with the Elastic License -// 2.0. -package org.elasticsearch.xpack.esql.expression.function.scalar.convert; - -import java.lang.Override; -import java.lang.String; -import org.apache.lucene.util.RamUsageEstimator; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.FloatBlock; -import org.elasticsearch.compute.data.LongBlock; -import org.elasticsearch.compute.data.LongVector; -import org.elasticsearch.compute.data.Vector; -import org.elasticsearch.compute.operator.DriverContext; -import org.elasticsearch.compute.operator.EvalOperator; -import org.elasticsearch.core.Releasables; -import org.elasticsearch.xpack.esql.core.tree.Source; - -/** - * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. - * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. - */ -public final class ToDenseVectorFromUnsignedLongEvaluator extends AbstractConvertFunction.AbstractEvaluator { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromUnsignedLongEvaluator.class); - - private final EvalOperator.ExpressionEvaluator l; - - public ToDenseVectorFromUnsignedLongEvaluator(Source source, EvalOperator.ExpressionEvaluator l, - DriverContext driverContext) { - super(driverContext, source); - this.l = l; - } - - @Override - public EvalOperator.ExpressionEvaluator next() { - return l; - } - - @Override - public Block evalVector(Vector v) { - LongVector vector = (LongVector) v; - int positionCount = v.getPositionCount(); - if (vector.isConstant()) { - return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); - } - try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { - for (int p = 0; p < positionCount; p++) { - builder.appendFloat(evalValue(vector, p)); - } - return builder.build(); - } - } - - private float evalValue(LongVector container, int index) { - long value = container.getLong(index); - return ToDenseVector.fromUnsignedLong(value); - } - - @Override - public Block evalBlock(Block b) { - LongBlock block = (LongBlock) b; - int positionCount = block.getPositionCount(); - try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { - for (int p = 0; p < positionCount; p++) { - int valueCount = block.getValueCount(p); - int start = block.getFirstValueIndex(p); - int end = start + valueCount; - boolean positionOpened = false; - boolean valuesAppended = false; - for (int i = start; i < end; i++) { - float value = evalValue(block, i); - if (positionOpened == false && valueCount > 1) { - builder.beginPositionEntry(); - positionOpened = true; - } - builder.appendFloat(value); - valuesAppended = true; - } - if (valuesAppended == false) { - builder.appendNull(); - } else if (positionOpened) { - builder.endPositionEntry(); - } - } - return builder.build(); - } - } - - private float evalValue(LongBlock container, int index) { - long value = container.getLong(index); - return ToDenseVector.fromUnsignedLong(value); - } - - @Override - public String toString() { - return "ToDenseVectorFromUnsignedLongEvaluator[" + "l=" + l + "]"; - } - - @Override - public void close() { - Releasables.closeExpectNoException(l); - } - - @Override - public long baseRamBytesUsed() { - long baseRamBytesUsed = BASE_RAM_BYTES_USED; - baseRamBytesUsed += l.baseRamBytesUsed(); - return baseRamBytesUsed; - } - - public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { - private final Source source; - - private final EvalOperator.ExpressionEvaluator.Factory l; - - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory l) { - this.source = source; - this.l = l; - } - - @Override - public ToDenseVectorFromUnsignedLongEvaluator get(DriverContext context) { - return new ToDenseVectorFromUnsignedLongEvaluator(source, l.get(context), context); - } - - @Override - public String toString() { - return "ToDenseVectorFromUnsignedLongEvaluator[" + "l=" + l + "]"; - } - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index 11b4f777976cf..749d4ae4a135f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -28,9 +28,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToDouble; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.unsignedLongToDouble; public class ToDenseVector extends AbstractConvertFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( @@ -43,7 +41,6 @@ public class ToDenseVector extends AbstractConvertFunction { Map.entry(DENSE_VECTOR, (source, fieldEval) -> fieldEval), Map.entry(KEYWORD, ToDenseVectorFromStringEvaluator.Factory::new), Map.entry(TEXT, ToDenseVectorFromStringEvaluator.Factory::new), - Map.entry(UNSIGNED_LONG, ToDenseVectorFromUnsignedLongEvaluator.Factory::new), Map.entry(LONG, ToDenseVectorFromLongEvaluator.Factory::new), Map.entry(INTEGER, ToDenseVectorFromIntEvaluator.Factory::new), Map.entry(DOUBLE, ToDenseVectorFromDoubleEvaluator.Factory::new) @@ -57,7 +54,7 @@ public ToDenseVector( Source source, @Param( name = "field", - type = { "keyword", "text", "double", "long", "unsigned_long", "integer", "float" }, + type = { "keyword", "text", "double", "long", "integer" }, description = "Input multi-valued column or an expression." ) Expression field ) { @@ -98,11 +95,6 @@ static float fromString(BytesRef in) { return (float) stringToDouble(in.utf8ToString()); } - @ConvertEvaluator(extraName = "FromUnsignedLong") - static float fromUnsignedLong(long l) { - return (float) unsignedLongToDouble(l); - } - @ConvertEvaluator(extraName = "FromLong") static float fromLong(long l) { return l; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 6319da5e0f080..0aabf9540bf47 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -9,7 +9,9 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + import org.apache.lucene.util.BytesRef; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -17,6 +19,7 @@ import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.function.Supplier; @@ -36,41 +39,46 @@ public static Iterable parameters() { new TestCaseSupplier( "int", List.of(DataType.INTEGER), - () -> new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - 1, - DataType.INTEGER, - "int" - ) - ), - evaluatorName("Int", "i"), - DataType.DENSE_VECTOR, - equalTo(1.0f) - ) + () -> { + List data = Arrays.asList(randomArray(1, 10, Integer[]::new, ESTestCase::randomInt)); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + data, + DataType.INTEGER, + "int" + ) + ), + evaluatorName("Int", "i"), + DataType.DENSE_VECTOR, + equalTo(data.stream().map(Number::floatValue).toList()) + ); + } ) ); - // Multi-valued inputs suppliers.add( new TestCaseSupplier( - "mv_long", + "long", List.of(DataType.LONG), - () -> new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - List.of(1L, 2L, 3L), - DataType.LONG, - "mv_long" - ) - ), - evaluatorName("Long", "l"), - DataType.DENSE_VECTOR, - equalTo(List.of(1.0f, 2.0f, 3.0f)) - ) + () -> { + List data = Arrays.asList(randomArray(1, 10, Long[]::new, ESTestCase::randomLong)); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + data, + DataType.LONG, + "long" + ) + ), + evaluatorName("Long", "l"), + DataType.DENSE_VECTOR, + equalTo(data.stream().map(Number::floatValue).toList()) + ); + } ) ); - + suppliers.add( new TestCaseSupplier( "mv_string", From 5a9a8c1b28a8181c20aba40d73eb85a5002ae0f6 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 2 Sep 2025 18:32:47 +0200 Subject: [PATCH 04/25] Add docs and examples, remove string conversion --- .../src/main/resources/dense_vector.csv-spec | 43 +++++++++++++++++++ .../function/EsqlFunctionRegistry.java | 2 + .../scalar/convert/ToDenseVector.java | 19 +++----- .../scalar/convert/ToDenseVectorTests.java | 20 --------- 4 files changed, 50 insertions(+), 34 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec index 077565b8b8997..de1e218768c58 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -45,3 +45,46 @@ id:l | new_vector:dense_vector 2 | [9.0, 8.0, 7.0] 3 | [0.054, 0.032, 0.012] ; + +convertIntsToDenseVector +required_capability: dense_vector_field_type +required_capability: to_dense_vector_function + +// tag::to_dense_vector-ints[] +row ints = [1, 2, 3] +| eval vector = to_dense_vector(ints) +| keep vector +// end::to_dense_vector-ints[] +; + +// tag::to_dense_vector-ints-result[] +vector:dense_vector +[1.0, 2.0, 3.0] +// end::to_dense_vector-ints-result[] +; + +convertLongsToDenseVector +required_capability: dense_vector_field_type +required_capability: to_dense_vector_function + +row longs = [5013792, 2147483647, 501379200000] +| eval vector = to_dense_vector(longs) +| keep vector +; + +vector:dense_vector +[5013792.0, 2147483647.0, 501379200000.0] +; + +convertDoublesToDenseVector +required_capability: dense_vector_field_type +required_capability: to_dense_vector_function + +row doubles = [123.4, 567.8, 901.2] +| eval vector = to_dense_vector(doubles) +| keep vector +; + +vector:dense_vector +[123.4, 567.8, 901.2] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0df4b4c035918..3da7002e6e2b3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -69,6 +69,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatePeriod; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDegrees; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape; @@ -505,6 +506,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(Score.class, uni(Score::new), Score.NAME), def(Term.class, bi(Term::new), "term"), def(Knn.class, quad(Knn::new), "knn"), + def(ToDenseVector.class, ToDenseVector::new, "to_dense_vector"), def(ToGeohash.class, ToGeohash::new, "to_geohash"), def(ToGeotile.class, ToGeotile::new, "to_geotile"), def(ToGeohex.class, ToGeohex::new, "to_geohex"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index 749d4ae4a135f..bf8d2d6741b69 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.convert; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.ConvertEvaluator; @@ -15,6 +14,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -25,10 +25,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; -import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; -import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToDouble; public class ToDenseVector extends AbstractConvertFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( @@ -39,8 +36,6 @@ public class ToDenseVector extends AbstractConvertFunction { private static final Map EVALUATORS = Map.ofEntries( Map.entry(DENSE_VECTOR, (source, fieldEval) -> fieldEval), - Map.entry(KEYWORD, ToDenseVectorFromStringEvaluator.Factory::new), - Map.entry(TEXT, ToDenseVectorFromStringEvaluator.Factory::new), Map.entry(LONG, ToDenseVectorFromLongEvaluator.Factory::new), Map.entry(INTEGER, ToDenseVectorFromIntEvaluator.Factory::new), Map.entry(DOUBLE, ToDenseVectorFromDoubleEvaluator.Factory::new) @@ -48,14 +43,15 @@ public class ToDenseVector extends AbstractConvertFunction { @FunctionInfo( returnType = "dense_vector", - description = "Converts a multi-valued input of numbers or strings to a dense_vector." + description = "Converts a multi-valued input of numbers to a dense_vector.", + examples = @Example(file = "dense_vector", tag = "to_dense_vector-ints") ) public ToDenseVector( Source source, @Param( name = "field", - type = { "keyword", "text", "double", "long", "integer" }, - description = "Input multi-valued column or an expression." + type = {"double", "long", "integer"}, + description = "multi-valued input of numbers to convert." ) Expression field ) { super(source, field); @@ -90,11 +86,6 @@ protected NodeInfo info() { return NodeInfo.create(this, ToDenseVector::new, field()); } - @ConvertEvaluator(extraName = "FromString", warnExceptions = { org.elasticsearch.xpack.esql.core.InvalidArgumentException.class }) - static float fromString(BytesRef in) { - return (float) stringToDouble(in.utf8ToString()); - } - @ConvertEvaluator(extraName = "FromLong") static float fromLong(long l) { return l; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 0aabf9540bf47..7552069a8c5a4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -10,7 +10,6 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -79,25 +78,6 @@ public static Iterable parameters() { ) ); - suppliers.add( - new TestCaseSupplier( - "mv_string", - List.of(DataType.KEYWORD), - () -> new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - List.of(new BytesRef("1.1"), new BytesRef("2.2")), - DataType.KEYWORD, - "mv_string" - ) - ), - evaluatorName("String", "in"), - DataType.DENSE_VECTOR, - equalTo(List.of(1.1f, 2.2f)) - ) - ) - ); - return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); } From 1fd0cc0289f6efc6e6fdfe9ca0c19224f56c64c7 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 2 Sep 2025 18:43:51 +0200 Subject: [PATCH 05/25] Fix analyzer and tests --- .../org/elasticsearch/xpack/esql/analysis/Analyzer.java | 7 +++---- .../elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 4a496ebf80ec0..f2f394b78abca 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -169,7 +169,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount; import static org.elasticsearch.xpack.esql.telemetry.FeatureMetric.LIMIT; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.maybeParseTemporalAmount; @@ -1670,8 +1669,8 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor List args = vectorFunction.arguments(); List newArgs = new ArrayList<>(); for (Expression arg : args) { - if (arg.resolved()) { - if (arg.foldable() && arg.dataType().isNumeric()) { + if (arg.resolved() && arg.dataType().isNumeric()) { + if (arg.foldable()) { Object folded = arg.fold(FoldContext.small() /* TODO remove me */); if (folded instanceof List) { // Convert to floats so blocks are created accordingly @@ -1685,7 +1684,7 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor newArgs.add(denseVector); continue; } - } else if (arg.dataType().isNumeric() || isString(arg.dataType())) { + } else { // add casting function newArgs.add(new ToDenseVector(arg.source(), arg)); continue; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 257f27abbe79b..1245248849cd3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2434,10 +2434,10 @@ private void checkDenseVectorImplicitCastingSimilarityFunction(String similarity assertEquals("similarity", alias.name()); var similarity = as(alias.child(), VectorSimilarityFunction.class); var left = as(similarity.left(), FieldAttribute.class); - var queryVector = as(similarity.right(), ToDenseVector.class); - assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); - var refAttr = as(queryVector.children().get(0), ReferenceAttribute.class); - assertThat(refAttr.name(), is("query")); + assertThat(List.of("float_vector", "byte_vector"), hasItem(left.name())); + var right = as(similarity.right(), Literal.class); + assertThat(right.dataType(), is(DENSE_VECTOR)); + assertThat(right.value(), equalTo(expectedElems)); } public void testDenseVectorEvalCastingSimilarityFunctions() { From 83331f3e90737325bc482776e103cdfca1eada9b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 2 Sep 2025 16:49:05 +0000 Subject: [PATCH 06/25] [CI] Auto commit changes from spotless --- .../scalar/convert/ToDenseVector.java | 2 +- .../xpack/esql/analysis/AnalyzerTests.java | 20 ++----- .../scalar/convert/ToDenseVectorTests.java | 60 ++++++------------- 3 files changed, 24 insertions(+), 58 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index bf8d2d6741b69..45096da59465c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -50,7 +50,7 @@ public ToDenseVector( Source source, @Param( name = "field", - type = {"double", "long", "integer"}, + type = { "double", "long", "integer" }, description = "multi-valued input of numbers to convert." ) Expression field ) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 1245248849cd3..6ae82349f8bc1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2442,33 +2442,23 @@ private void checkDenseVectorImplicitCastingSimilarityFunction(String similarity public void testDenseVectorEvalCastingSimilarityFunctions() { if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkDenseVectorEvalCastingSimilarityFunction( - "v_cosine(float_vector, query)" - ); + checkDenseVectorEvalCastingSimilarityFunction("v_cosine(float_vector, query)"); checkDenseVectorEvalCastingSimilarityFunction("v_cosine(byte_vector, query)"); } if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkDenseVectorEvalCastingSimilarityFunction( - "v_dot_product(float_vector, query)" - ); + checkDenseVectorEvalCastingSimilarityFunction("v_dot_product(float_vector, query)"); checkDenseVectorEvalCastingSimilarityFunction("v_dot_product(byte_vector, query)"); } if (EsqlCapabilities.Cap.L1_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkDenseVectorEvalCastingSimilarityFunction( - "v_l1_norm(float_vector, query)" - ); + checkDenseVectorEvalCastingSimilarityFunction("v_l1_norm(float_vector, query)"); checkDenseVectorEvalCastingSimilarityFunction("v_l1_norm(byte_vector, query)"); } if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkDenseVectorEvalCastingSimilarityFunction( - "v_l2_norm(float_vector, query)" - ); + checkDenseVectorEvalCastingSimilarityFunction("v_l2_norm(float_vector, query)"); checkDenseVectorEvalCastingSimilarityFunction("v_l2_norm(float_vector, query)"); } if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkDenseVectorEvalCastingSimilarityFunction( - "v_hamming(byte_vector, query)" - ); + checkDenseVectorEvalCastingSimilarityFunction("v_hamming(byte_vector, query)"); checkDenseVectorEvalCastingSimilarityFunction("v_hamming(byte_vector, query)"); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 7552069a8c5a4..702fe976a5b59 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -34,49 +34,25 @@ public ToDenseVectorTests(@Name("TestCase") Supplier public static Iterable parameters() { List suppliers = new ArrayList<>(); - suppliers.add( - new TestCaseSupplier( - "int", - List.of(DataType.INTEGER), - () -> { - List data = Arrays.asList(randomArray(1, 10, Integer[]::new, ESTestCase::randomInt)); - return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - data, - DataType.INTEGER, - "int" - ) - ), - evaluatorName("Int", "i"), - DataType.DENSE_VECTOR, - equalTo(data.stream().map(Number::floatValue).toList()) - ); - } - ) - ); + suppliers.add(new TestCaseSupplier("int", List.of(DataType.INTEGER), () -> { + List data = Arrays.asList(randomArray(1, 10, Integer[]::new, ESTestCase::randomInt)); + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(data, DataType.INTEGER, "int")), + evaluatorName("Int", "i"), + DataType.DENSE_VECTOR, + equalTo(data.stream().map(Number::floatValue).toList()) + ); + })); - suppliers.add( - new TestCaseSupplier( - "long", - List.of(DataType.LONG), - () -> { - List data = Arrays.asList(randomArray(1, 10, Long[]::new, ESTestCase::randomLong)); - return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - data, - DataType.LONG, - "long" - ) - ), - evaluatorName("Long", "l"), - DataType.DENSE_VECTOR, - equalTo(data.stream().map(Number::floatValue).toList()) - ); - } - ) - ); + suppliers.add(new TestCaseSupplier("long", List.of(DataType.LONG), () -> { + List data = Arrays.asList(randomArray(1, 10, Long[]::new, ESTestCase::randomLong)); + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(data, DataType.LONG, "long")), + evaluatorName("Long", "l"), + DataType.DENSE_VECTOR, + equalTo(data.stream().map(Number::floatValue).toList()) + ); + })); return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); } From ada9dae86ff0d35d573021e564d859d06d7536ea Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 09:30:55 +0200 Subject: [PATCH 07/25] Create vectorArgumentsCount() on VectorFunction to determine what params to cast --- .../org/elasticsearch/xpack/esql/analysis/Analyzer.java | 7 +++++-- .../xpack/esql/expression/function/vector/Knn.java | 5 +++++ .../xpack/esql/expression/function/vector/Magnitude.java | 5 +++++ .../esql/expression/function/vector/VectorFunction.java | 9 ++++++++- .../function/vector/VectorSimilarityFunction.java | 5 +++++ 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index f2f394b78abca..264b8e4b83847 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1668,8 +1668,11 @@ private static Expression castStringLiteral(Expression from, DataType target) { private static Expression processVectorFunction(org.elasticsearch.xpack.esql.core.expression.function.Function vectorFunction) { List args = vectorFunction.arguments(); List newArgs = new ArrayList<>(); - for (Expression arg : args) { - if (arg.resolved() && arg.dataType().isNumeric()) { + // Only the first vector arguments are vectors and considered for casting + int vectorArgsCount = ((VectorFunction)vectorFunction).vectorArgumentsCount(); + for (int i = 0; i < args.size(); i++) { + Expression arg = args.get(i); + if (i < vectorArgsCount && arg.resolved() && arg.dataType().isNumeric()) { if (arg.foldable()) { Object folded = arg.fold(FoldContext.small() /* TODO remove me */); if (folded instanceof List) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 0b64fb43909df..5f97214583424 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -203,6 +203,11 @@ public DataType dataType() { return DataType.BOOLEAN; } + @Override + public int vectorArgumentsCount() { + return 2; + } + @Override protected TypeResolution resolveParams() { return resolveField().and(resolveQuery()).and(resolveK()).and(Options.resolve(options(), source(), FOURTH, ALLOWED_OPTIONS)); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java index f629de35af7b8..5f302f7349703 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java @@ -96,6 +96,11 @@ protected TypeResolution resolveType() { return isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.FIRST, "dense_vector"); } + @Override + public int vectorArgumentsCount() { + return 1; + } + /** * Functional interface for evaluating the scalar value of the underlying float array. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java index dc0be7a29fee0..224f82794d851 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java @@ -12,4 +12,11 @@ * from multi values to dense_vector field types, so parameters are actually * processed as dense_vectors in vector functions */ -public interface VectorFunction {} +public interface VectorFunction { + + /** + * Number of arguments that should be treated as vectors. The first vectorArgumentsCount() arguments will be implicitly casted as + * dense_vector according to the value returned of this method + */ + int vectorArgumentsCount(); +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java index 1e9727398d458..d6b3ae759c362 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java @@ -77,6 +77,11 @@ public Object fold(FoldContext ctx) { return EvaluatorMapper.super.fold(source(), ctx); } + @Override + public int vectorArgumentsCount() { + return 2; + } + @Override public final EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { return new SimilarityEvaluatorFactory( From 43c0d9a8992805a0ddb8b45ea2e62e8c7ff059bd Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 09:31:02 +0200 Subject: [PATCH 08/25] Add double tests --- .../scalar/convert/ToDenseVectorTests.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 7552069a8c5a4..74e9e34e234a5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -78,6 +78,28 @@ public static Iterable parameters() { ) ); + suppliers.add( + new TestCaseSupplier( + "double", + List.of(DataType.DOUBLE), + () -> { + List data = Arrays.asList(randomArray(1, 10, Double[]::new, ESTestCase::randomDouble)); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + data, + DataType.DOUBLE, + "double" + ) + ), + evaluatorName("Double", "d"), + DataType.DENSE_VECTOR, + equalTo(data.stream().map(Number::floatValue).toList()) + ); + } + ) + ); + return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); } From 8e7247abd07f13170872e3509850f90d270f1ac9 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 14:24:26 +0200 Subject: [PATCH 09/25] Remove unnecessary converters --- .../src/main/resources/dense_vector.csv-spec | 13 ++ .../src/main/resources/knn-function.csv-spec | 25 +++ .../ToDenseVectorFromBooleanEvaluator.java | 132 --------------- .../ToDenseVectorFromStringEvaluator.java | 150 ------------------ .../xpack/esql/analysis/AnalyzerTests.java | 15 ++ .../scalar/convert/ToDenseVectorTests.java | 30 ++++ .../optimizer/LogicalPlanOptimizerTests.java | 12 ++ 7 files changed, 95 insertions(+), 282 deletions(-) delete mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java delete mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec index de1e218768c58..c57576009d845 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -88,3 +88,16 @@ row doubles = [123.4, 567.8, 901.2] vector:dense_vector [123.4, 567.8, 901.2] ; + +convertHexStringToDenseVector +required_capability: dense_vector_field_type +required_capability: to_dense_vector_function + +row hex_str = "0102030405060708090a0b0c0d0e0f" +| eval vector = to_dense_vector(hex_str) +| keep vector +; + +vector:dense_vector + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index caf19eb6825d9..1cd66e2d6946c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -327,3 +327,28 @@ navy | [0.0, 0.0, 128.0] gray | [128.0, 128.0, 128.0] chartreuse | [127.0, 255.0, 0.0] ; + +knnWithHexStringCasting +required_capability: knn_function_v3 +required_capability: to_dense_vector_function + +from colors metadata _score +| eval query = [0, 120, 0] +| where knn(rgb_vector, "007800", 10) +| sort _score desc, color asc +| keep color, rgb_vector +| limit 10 +; + +color:text | rgb_vector:dense_vector +green | [0.0, 128.0, 0.0] +black | [0.0, 0.0, 0.0] +olive | [128.0, 128.0, 0.0] +teal | [0.0, 128.0, 128.0] +lime | [0.0, 255.0, 0.0] +sienna | [160.0, 82.0, 45.0] +maroon | [128.0, 0.0, 0.0] +navy | [0.0, 0.0, 128.0] +gray | [128.0, 128.0, 128.0] +chartreuse | [127.0, 255.0, 0.0] +; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java deleted file mode 100644 index 582fcdb070a3b..0000000000000 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromBooleanEvaluator.java +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License -// 2.0; you may not use this file except in compliance with the Elastic License -// 2.0. -package org.elasticsearch.xpack.esql.expression.function.scalar.convert; - -import java.lang.Override; -import java.lang.String; -import org.apache.lucene.util.RamUsageEstimator; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BooleanBlock; -import org.elasticsearch.compute.data.BooleanVector; -import org.elasticsearch.compute.data.FloatBlock; -import org.elasticsearch.compute.data.Vector; -import org.elasticsearch.compute.operator.DriverContext; -import org.elasticsearch.compute.operator.EvalOperator; -import org.elasticsearch.core.Releasables; -import org.elasticsearch.xpack.esql.core.tree.Source; - -/** - * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. - * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. - */ -public final class ToDenseVectorFromBooleanEvaluator extends AbstractConvertFunction.AbstractEvaluator { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromBooleanEvaluator.class); - - private final EvalOperator.ExpressionEvaluator bool; - - public ToDenseVectorFromBooleanEvaluator(Source source, EvalOperator.ExpressionEvaluator bool, - DriverContext driverContext) { - super(driverContext, source); - this.bool = bool; - } - - @Override - public EvalOperator.ExpressionEvaluator next() { - return bool; - } - - @Override - public Block evalVector(Vector v) { - BooleanVector vector = (BooleanVector) v; - int positionCount = v.getPositionCount(); - if (vector.isConstant()) { - return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0), positionCount); - } - try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { - for (int p = 0; p < positionCount; p++) { - builder.appendFloat(evalValue(vector, p)); - } - return builder.build(); - } - } - - private float evalValue(BooleanVector container, int index) { - boolean value = container.getBoolean(index); - return ToDenseVector.fromBoolean(value); - } - - @Override - public Block evalBlock(Block b) { - BooleanBlock block = (BooleanBlock) b; - int positionCount = block.getPositionCount(); - try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { - for (int p = 0; p < positionCount; p++) { - int valueCount = block.getValueCount(p); - int start = block.getFirstValueIndex(p); - int end = start + valueCount; - boolean positionOpened = false; - boolean valuesAppended = false; - for (int i = start; i < end; i++) { - float value = evalValue(block, i); - if (positionOpened == false && valueCount > 1) { - builder.beginPositionEntry(); - positionOpened = true; - } - builder.appendFloat(value); - valuesAppended = true; - } - if (valuesAppended == false) { - builder.appendNull(); - } else if (positionOpened) { - builder.endPositionEntry(); - } - } - return builder.build(); - } - } - - private float evalValue(BooleanBlock container, int index) { - boolean value = container.getBoolean(index); - return ToDenseVector.fromBoolean(value); - } - - @Override - public String toString() { - return "ToDenseVectorFromBooleanEvaluator[" + "bool=" + bool + "]"; - } - - @Override - public void close() { - Releasables.closeExpectNoException(bool); - } - - @Override - public long baseRamBytesUsed() { - long baseRamBytesUsed = BASE_RAM_BYTES_USED; - baseRamBytesUsed += bool.baseRamBytesUsed(); - return baseRamBytesUsed; - } - - public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { - private final Source source; - - private final EvalOperator.ExpressionEvaluator.Factory bool; - - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory bool) { - this.source = source; - this.bool = bool; - } - - @Override - public ToDenseVectorFromBooleanEvaluator get(DriverContext context) { - return new ToDenseVectorFromBooleanEvaluator(source, bool.get(context), context); - } - - @Override - public String toString() { - return "ToDenseVectorFromBooleanEvaluator[" + "bool=" + bool + "]"; - } - } -} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java deleted file mode 100644 index f53fbe12221df..0000000000000 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License -// 2.0; you may not use this file except in compliance with the Elastic License -// 2.0. -package org.elasticsearch.xpack.esql.expression.function.scalar.convert; - -import java.lang.Override; -import java.lang.String; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.RamUsageEstimator; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.BytesRefVector; -import org.elasticsearch.compute.data.FloatBlock; -import org.elasticsearch.compute.data.Vector; -import org.elasticsearch.compute.operator.DriverContext; -import org.elasticsearch.compute.operator.EvalOperator; -import org.elasticsearch.core.Releasables; -import org.elasticsearch.xpack.esql.core.InvalidArgumentException; -import org.elasticsearch.xpack.esql.core.tree.Source; - -/** - * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToDenseVector}. - * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. - */ -public final class ToDenseVectorFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromStringEvaluator.class); - - private final EvalOperator.ExpressionEvaluator in; - - public ToDenseVectorFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator in, - DriverContext driverContext) { - super(driverContext, source); - this.in = in; - } - - @Override - public EvalOperator.ExpressionEvaluator next() { - return in; - } - - @Override - public Block evalVector(Vector v) { - BytesRefVector vector = (BytesRefVector) v; - int positionCount = v.getPositionCount(); - BytesRef scratchPad = new BytesRef(); - if (vector.isConstant()) { - try { - return driverContext.blockFactory().newConstantFloatBlockWith(evalValue(vector, 0, scratchPad), positionCount); - } catch (InvalidArgumentException e) { - registerException(e); - return driverContext.blockFactory().newConstantNullBlock(positionCount); - } - } - try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { - for (int p = 0; p < positionCount; p++) { - try { - builder.appendFloat(evalValue(vector, p, scratchPad)); - } catch (InvalidArgumentException e) { - registerException(e); - builder.appendNull(); - } - } - return builder.build(); - } - } - - private float evalValue(BytesRefVector container, int index, BytesRef scratchPad) { - BytesRef value = container.getBytesRef(index, scratchPad); - return ToDenseVector.fromString(value); - } - - @Override - public Block evalBlock(Block b) { - BytesRefBlock block = (BytesRefBlock) b; - int positionCount = block.getPositionCount(); - try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount)) { - BytesRef scratchPad = new BytesRef(); - for (int p = 0; p < positionCount; p++) { - int valueCount = block.getValueCount(p); - int start = block.getFirstValueIndex(p); - int end = start + valueCount; - boolean positionOpened = false; - boolean valuesAppended = false; - for (int i = start; i < end; i++) { - try { - float value = evalValue(block, i, scratchPad); - if (positionOpened == false && valueCount > 1) { - builder.beginPositionEntry(); - positionOpened = true; - } - builder.appendFloat(value); - valuesAppended = true; - } catch (InvalidArgumentException e) { - registerException(e); - } - } - if (valuesAppended == false) { - builder.appendNull(); - } else if (positionOpened) { - builder.endPositionEntry(); - } - } - return builder.build(); - } - } - - private float evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { - BytesRef value = container.getBytesRef(index, scratchPad); - return ToDenseVector.fromString(value); - } - - @Override - public String toString() { - return "ToDenseVectorFromStringEvaluator[" + "in=" + in + "]"; - } - - @Override - public void close() { - Releasables.closeExpectNoException(in); - } - - @Override - public long baseRamBytesUsed() { - long baseRamBytesUsed = BASE_RAM_BYTES_USED; - baseRamBytesUsed += in.baseRamBytesUsed(); - return baseRamBytesUsed; - } - - public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { - private final Source source; - - private final EvalOperator.ExpressionEvaluator.Factory in; - - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory in) { - this.source = source; - this.in = in; - } - - @Override - public ToDenseVectorFromStringEvaluator get(DriverContext context) { - return new ToDenseVectorFromStringEvaluator(source, in.get(context), context); - } - - @Override - public String toString() { - return "ToDenseVectorFromStringEvaluator[" + "in=" + in + "]"; - } - } -} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 1245248849cd3..f2768eed1aa7b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2352,6 +2352,8 @@ public void testDenseVectorImplicitCastingKnn() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + checkDenseVectorCastingHexKnn("float_vector"); + checkDenseVectorCastingHexKnn("byte_vector"); checkDenseVectorCastingKnn("float_vector"); checkDenseVectorCastingKnn("byte_vector"); checkDenseVectorEvalCastingKnn("float_vector"); @@ -2371,6 +2373,19 @@ private static void checkDenseVectorCastingKnn(String fieldName) { assertThat(queryVector.value(), equalTo(List.of(0.0f, 1.0f, 2.0f))); } + private static void checkDenseVectorCastingHexKnn(String fieldName) { + var plan = analyze(String.format(Locale.ROOT, """ + from test | where knn(%s, "000102", 10) + """, fieldName), "mapping-dense_vector.json"); + + var limit = as(plan, Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + var queryVector = as(knn.query(), Literal.class); + assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); + assertThat(queryVector.value(), equalTo(List.of(0.0f, 1.0f, 2.0f))); + } + private static void checkDenseVectorEvalCastingKnn(String fieldName) { var plan = analyze(String.format(Locale.ROOT, """ from test | eval query = [0, 1, 2] | where knn(%s, query, 10) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 74e9e34e234a5..def7b133e3938 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -19,10 +20,12 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.HexFormat; import java.util.List; import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; public class ToDenseVectorTests extends AbstractScalarFunctionTestCase { @@ -100,6 +103,33 @@ public static Iterable parameters() { ) ); + suppliers.add( + new TestCaseSupplier( + "keyword", + List.of(DataType.KEYWORD), + () -> { + byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 20)); + String data = HexFormat.of().formatHex(bytes); + List expected = new ArrayList<>(bytes.length); + for (int i = 0; i < bytes.length; i++) { + expected.add((float) bytes[i]); + } + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData( + new BytesRef(data), + DataType.KEYWORD, + "keyword" + ) + ), + evaluatorName("String", "s"), + DataType.DENSE_VECTOR, + is(expected) + ); + } + ) + ); + return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 67bb40214cb7b..4e25abc573389 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8625,6 +8625,18 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { assertThat(leftAnd.right(), equalTo(rightAndPrefilter)); } + + public void testTest() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + + var query = """ + from test + | where knn(dense_vector, 1127, 10) + """; + var optimized = planTypes(query); + var limit = as(optimized, Limit.class); + } + public void testMultipleKnnQueriesInPrefilters() { assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); From ffc52aa10d368700c2b299f30540a1041ef2fd12 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 14:25:12 +0200 Subject: [PATCH 10/25] Add ToDenseVectorFromStringEvaluator --- .../scalar/convert/ToDenseVector.java | 13 +- .../ToDenseVectorFromStringEvaluator.java | 115 ++++++++++++++++++ 2 files changed, 123 insertions(+), 5 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index bf8d2d6741b69..2601dd4a2530d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.ConvertEvaluator; +import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -25,9 +26,10 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; -public class ToDenseVector extends AbstractConvertFunction { +public class ToDenseVector extends AbstractConvertFunction implements PostAnalysisVerificationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ToDenseVector", @@ -38,20 +40,21 @@ public class ToDenseVector extends AbstractConvertFunction { Map.entry(DENSE_VECTOR, (source, fieldEval) -> fieldEval), Map.entry(LONG, ToDenseVectorFromLongEvaluator.Factory::new), Map.entry(INTEGER, ToDenseVectorFromIntEvaluator.Factory::new), - Map.entry(DOUBLE, ToDenseVectorFromDoubleEvaluator.Factory::new) + Map.entry(DOUBLE, ToDenseVectorFromDoubleEvaluator.Factory::new), + Map.entry(KEYWORD, ToDenseVectorFromStringEvaluator.Factory::new) ); @FunctionInfo( returnType = "dense_vector", - description = "Converts a multi-valued input of numbers to a dense_vector.", + description = "Converts a multi-valued input of numbers, or a hexadecimal string, to a dense_vector.", examples = @Example(file = "dense_vector", tag = "to_dense_vector-ints") ) public ToDenseVector( Source source, @Param( name = "field", - type = {"double", "long", "integer"}, - description = "multi-valued input of numbers to convert." + type = {"double", "long", "integer", "keyword"}, + description = "multi-valued input of numbers or hexadecimal string to convert." ) Expression field ) { super(source, field); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java new file mode 100644 index 0000000000000..fa410acb996e1 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java @@ -0,0 +1,115 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.util.HexFormat; + +public class ToDenseVectorFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance( + ToDenseVectorFromStringEvaluator.class + ); + + private final EvalOperator.ExpressionEvaluator field; + + public ToDenseVectorFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator field, DriverContext driverContext) { + super(driverContext, source); + this.field = field; + } + + @Override + protected EvalOperator.ExpressionEvaluator next() { + return field; + } + + @Override + protected Block evalVector(Vector v) { + return evalBlock(v.asBlock()); + } + + @Override + public Block evalBlock(Block b) { + BytesRefBlock block = (BytesRefBlock) b; + int positionCount = block.getPositionCount(); + int dimensions = 0; + BytesRef scratch = new BytesRef(); + try (FloatBlock.Builder builder = driverContext.blockFactory().newFloatBlockBuilder(positionCount * dimensions)) { + for (int p = 0; p < positionCount; p++) { + if (block.isNull(p)) { + builder.appendNull(); + } else { + scratch = block.getBytesRef(p, scratch); + byte[] bytes = HexFormat.of().parseHex(scratch.utf8ToString()); + if (bytes.length == 0) { + builder.appendNull(); + continue; + } + if (dimensions == 0) { + dimensions = bytes.length; + } else { + if (bytes.length != dimensions) { + throw new IllegalArgumentException("All dense_vector must have the same number of dimensions. Expected: " + + dimensions + ", found: " + bytes.length); + } + } + builder.beginPositionEntry(); + for (byte value : bytes) { + builder.appendFloat(value); + } + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + @Override + public String toString() { + return "ToDenseVectorFromStringEvaluator[s=" + field + ']'; + } + + @Override + public long baseRamBytesUsed() { + return BASE_RAM_BYTES_USED + field.baseRamBytesUsed(); + } + + @Override + public void close() { + Releasables.closeExpectNoException(field); + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + private final EvalOperator.ExpressionEvaluator.Factory field; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field) { + this.source = source; + this.field = field; + } + + @Override + public EvalOperator.ExpressionEvaluator get(DriverContext context) { + return new ToDenseVectorFromStringEvaluator(source, field.get(context), context); + } + + @Override + public String toString() { + return "ToDenseVectorFromStringEvaluator[s=" + field + ']'; + } + } +} From b67e12199335a2a9ec4611bdf519563f6a52545e Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 16:22:01 +0200 Subject: [PATCH 11/25] Analyzer checks foldables --- .../xpack/esql/analysis/Analyzer.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 264b8e4b83847..291d461c60642 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.analysis; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.BytesRefs; @@ -139,6 +140,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.HexFormat; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -1672,23 +1674,31 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor int vectorArgsCount = ((VectorFunction)vectorFunction).vectorArgumentsCount(); for (int i = 0; i < args.size(); i++) { Expression arg = args.get(i); - if (i < vectorArgsCount && arg.resolved() && arg.dataType().isNumeric()) { + if (i < vectorArgsCount && arg.resolved()) { if (arg.foldable()) { - Object folded = arg.fold(FoldContext.small() /* TODO remove me */); - if (folded instanceof List) { + Object folded = arg.fold(FoldContext.small()); + List floatVector = null; + if (folded instanceof List && arg.dataType().isNumeric()) { // Convert to floats so blocks are created accordingly - List floatVector; if (arg.dataType() == FLOAT) { floatVector = (List) folded; } else { floatVector = ((List) folded).stream().map(Number::floatValue).collect(Collectors.toList()); } + } else if (folded instanceof BytesRef hexString && arg.dataType() == KEYWORD) { + byte[] bytes = HexFormat.of().parseHex(hexString.utf8ToString()); + floatVector = new ArrayList<>(); + for (byte value : bytes) { + floatVector.add((float) value); + } + } + if (floatVector != null) { Literal denseVector = new Literal(arg.source(), floatVector, DataType.DENSE_VECTOR); newArgs.add(denseVector); continue; } - } else { - // add casting function + } else if ((arg instanceof ToDenseVector == false) && (arg.dataType().isNumeric() || arg.dataType() == KEYWORD)) { + // add casting function if it's not already there newArgs.add(new ToDenseVector(arg.source(), arg)); continue; } @@ -1698,7 +1708,6 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor return vectorFunction.replaceChildren(newArgs); } - } /** From 1be036448ebdcb503185046aa63a22a7900c770c Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 18:34:18 +0200 Subject: [PATCH 12/25] Change verification process, so ToDenseVector is not used but direct literal translation is done --- .../src/main/resources/knn-function.csv-spec | 3 +- .../vector-cosine-similarity.csv-spec | 8 ++-- .../resources/vector-dot-product.csv-spec | 8 ++-- .../main/resources/vector-hamming.csv-spec | 8 ++-- .../main/resources/vector-l1-norm.csv-spec | 8 ++-- .../main/resources/vector-l2-norm.csv-spec | 8 ++-- .../main/resources/vector-magnitude.csv-spec | 6 +-- .../xpack/esql/plugin/KnnFunctionIT.java | 17 +++++++ .../xpack/esql/analysis/Analyzer.java | 24 ++++++---- .../scalar/convert/ToDenseVector.java | 44 ++++++++++++++++++- .../ToDenseVectorFromStringEvaluator.java | 38 +++++++++------- .../vector/VectorSimilarityFunction.java | 11 +++++ .../xpack/esql/analysis/AnalyzerTests.java | 40 ++++++++++++++--- 13 files changed, 165 insertions(+), 58 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 1cd66e2d6946c..ca323b8dedff3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -308,7 +308,7 @@ required_capability: knn_function_v3 required_capability: to_dense_vector_function from colors metadata _score -| eval query = [0, 120, 0] +| eval query = to_dense_vector([0, 120, 0]) | where knn(rgb_vector, query, 10) | sort _score desc, color asc | keep color, rgb_vector @@ -333,7 +333,6 @@ required_capability: knn_function_v3 required_capability: to_dense_vector_function from colors metadata _score -| eval query = [0, 120, 0] | where knn(rgb_vector, "007800", 10) | sort _score desc, color asc | keep color, rgb_vector diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec index b856ccd5f172e..451368deec934 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec @@ -94,12 +94,12 @@ similarityWithRow required_capability: cosine_vector_similarity_function required_capability: to_dense_vector_function -row vector = [1, 2, 3] +row vector = to_dense_vector([1, 2, 3]) | eval similarity = round(v_cosine(vector, [0, 1, 2]), 3) ; -vector: integer | similarity:double -[1, 2, 3] | 0.978 +vector: dense_vector | similarity:double +[1.0, 2.0, 3.0] | 0.978 ; similarityWithVectorField @@ -108,7 +108,7 @@ required_capability: to_dense_vector_function from colors | where color != "black" -| eval query = [0, 255, 255] +| eval query = to_dense_vector([0, 255, 255]) | eval similarity = v_cosine(rgb_vector, query) | sort similarity desc, color asc | limit 10 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec index d9d3d655625be..3297ae84db5ff 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec @@ -92,12 +92,12 @@ similarityWithRow required_capability: dot_product_vector_similarity_function required_capability: to_dense_vector_function -row vector = [1, 2, 3] +row vector = to_dense_vector([1, 2, 3]) | eval similarity = round(v_dot_product(vector, [0, 1, 2]), 3) ; -vector: integer | similarity:double -[1, 2, 3] | 4.5 +vector: dense_vector | similarity:double +[1.0, 2.0, 3.0] | 4.5 ; similarityWithVectorField @@ -105,7 +105,7 @@ required_capability: dot_product_vector_similarity_function required_capability: to_dense_vector_function from colors -| eval query = [0, 255, 255] +| eval query = to_dense_vector([0, 255, 255]) | eval similarity = v_dot_product(rgb_vector, query) | sort similarity desc, color asc | limit 10 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec index 9f372f104a689..37630c94e62e0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-hamming.csv-spec @@ -91,12 +91,12 @@ similarityWithRow required_capability: hamming_vector_similarity_function required_capability: to_dense_vector_function -row vector = [1, 2, 3] +row vector = to_dense_vector([1, 2, 3]) | eval similarity = round(v_hamming(vector, [0, 1, 2]), 3) ; -vector: integer | similarity:double -[1, 2, 3] | 4.0 +vector: dense_vector | similarity:double +[1.0, 2.0, 3.0] | 4.0 ; similarityWithVectorField @@ -104,7 +104,7 @@ required_capability: hamming_vector_similarity_function required_capability: to_dense_vector_function from colors -| eval query = [0, 255, 255] +| eval query = to_dense_vector([0, 255, 255]) | eval similarity = v_hamming(rgb_vector, query) | sort similarity desc, color asc | limit 10 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec index 8c9ba8406267a..148d9d0da85a9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec @@ -91,12 +91,12 @@ similarityWithRow required_capability: l1_norm_vector_similarity_function required_capability: to_dense_vector_function -row vector = [1, 2, 3] +row vector = to_dense_vector([1, 2, 3]) | eval similarity = round(v_l1_norm(vector, [0, 1, 2]), 3) ; -vector: integer | similarity:double -[1, 2, 3] | 3.0 +vector: dense_vector | similarity:double +[1.0, 2.0, 3.0] | 3.0 ; similarityWithVectorField @@ -104,7 +104,7 @@ required_capability: l1_norm_vector_similarity_function required_capability: to_dense_vector_function from colors -| eval query = [0, 255, 255] +| eval query = to_dense_vector([0, 255, 255]) | eval similarity = v_l1_norm(rgb_vector, query) | sort similarity desc, color asc | limit 10 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec index 0d1e0cf7d2835..d150c65e3b2fa 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec @@ -91,12 +91,12 @@ similarityWithRow required_capability: l2_norm_vector_similarity_function required_capability: to_dense_vector_function -row vector = [1, 2, 3] +row vector = to_dense_vector([1, 2, 3]) | eval similarity = round(v_l2_norm(vector, [0, 1, 2]), 3) ; -vector: integer | similarity:double -[1, 2, 3] | 1.732 +vector: dense_vector | similarity:double +[1.0, 2.0, 3.0] | 1.732 ; similarityWithVectorField @@ -104,7 +104,7 @@ required_capability: l2_norm_vector_similarity_function required_capability: to_dense_vector_function from colors -| eval query = [0, 255, 255] +| eval query = to_dense_vector([0, 255, 255]) | eval similarity = v_l2_norm(rgb_vector, query) | sort similarity desc, color asc | limit 10 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec index 58c2c9df9ea24..bb6d39735d8e4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec @@ -90,10 +90,10 @@ magnitudeWithRow required_capability: magnitude_scalar_vector_function required_capability: to_dense_vector_function -row vector = [1, 2, 3] +row vector = to_dense_vector([1, 2, 3]) | eval magnitude = round(v_magnitude(vector), 3) ; -vector: integer | magnitude:double -[1, 2, 3] | 3.742 +vector: dense_vector | magnitude:double +[1.0, 2.0, 3.0] | 3.742 ; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index d44a9b458b082..7fbb2c1851cc5 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -191,6 +191,23 @@ public void testKnnWithLookupJoin() { ); } + + public void testKnnIncorrectCasting() { + var query = String.format(Locale.ROOT, """ + FROM test + | WHERE KNN(vector, "notcorrect", 5) + """); + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString( + "line 3:13: [KNN] function cannot operate on [lookup_vector], supplied by an index [test_lookup] in non-STANDARD " + + "mode [lookup]" + ) + ); + } + @Before public void setup() throws IOException { assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 291d461c60642..37bdedd3654f2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -77,7 +77,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromAggregateMetricDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; -import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; @@ -1686,10 +1685,21 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor floatVector = ((List) folded).stream().map(Number::floatValue).collect(Collectors.toList()); } } else if (folded instanceof BytesRef hexString && arg.dataType() == KEYWORD) { - byte[] bytes = HexFormat.of().parseHex(hexString.utf8ToString()); - floatVector = new ArrayList<>(); - for (byte value : bytes) { - floatVector.add((float) value); + try { + byte[] bytes = HexFormat.of().parseHex(hexString.utf8ToString()); + floatVector = new ArrayList<>(); + for (byte value : bytes) { + floatVector.add((float) value); + } + } catch (IllegalArgumentException e) { + throw new VerificationException( + "Error in [" + + vectorFunction.sourceText() + + "] for argument [" + + arg.sourceText() + + "]; dense_vectors must be a hex-encoded string: " + + e.getMessage() + ); } } if (floatVector != null) { @@ -1697,10 +1707,6 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor newArgs.add(denseVector); continue; } - } else if ((arg instanceof ToDenseVector == false) && (arg.dataType().isNumeric() || arg.dataType() == KEYWORD)) { - // add casting function if it's not already there - newArgs.add(new ToDenseVector(arg.source(), arg)); - continue; } } newArgs.add(arg); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index 2601dd4a2530d..204aefdb9efd1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -7,21 +7,29 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.convert; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.ConvertEvaluator; +import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware; +import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; +import org.elasticsearch.xpack.esql.common.Failure; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; @@ -29,7 +37,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; -public class ToDenseVector extends AbstractConvertFunction implements PostAnalysisVerificationAware { +public class ToDenseVector extends AbstractConvertFunction implements PostAnalysisPlanVerificationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ToDenseVector", @@ -60,6 +68,40 @@ public ToDenseVector( super(source, field); } + @Override + public BiConsumer postAnalysisPlanVerification() { + return (lp, failures) -> { + Expression arg = children().get(0); + if (arg.foldable()) { + Object fold = arg.fold(FoldContext.small()); + if ((fold instanceof List list) && arg.dataType().isNumeric()) { + if (list.size() <= 1) { + failures.add(Failure.fail( + this, + "[" + sourceText() + "] requires at least two values to convert to a dense_vector" + )); + } + return; + } + if ((arg.dataType() == KEYWORD) && fold instanceof BytesRef bytesRef) { + if (bytesRef.length == 0) { + failures.add(Failure.fail( + this, + "[" + + sourceText() + + "] must be a non-empty hexadecimal string")); + } + return; + } + failures.add(Failure.fail( + this, + "[" + + sourceText() + + "] must be a multi-valued input of numbers or an hexadecimal string")); + } + }; + } + private ToDenseVector(StreamInput in) throws IOException { super(in); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java index fa410acb996e1..63ec9ee28b5f5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java @@ -16,6 +16,7 @@ import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.tree.Source; import java.util.HexFormat; @@ -54,24 +55,29 @@ public Block evalBlock(Block b) { builder.appendNull(); } else { scratch = block.getBytesRef(p, scratch); - byte[] bytes = HexFormat.of().parseHex(scratch.utf8ToString()); - if (bytes.length == 0) { - builder.appendNull(); - continue; - } - if (dimensions == 0) { - dimensions = bytes.length; - } else { - if (bytes.length != dimensions) { - throw new IllegalArgumentException("All dense_vector must have the same number of dimensions. Expected: " - + dimensions + ", found: " + bytes.length); + try { + byte[] bytes = HexFormat.of().parseHex(scratch.utf8ToString()); + if (bytes.length == 0) { + builder.appendNull(); + continue; } + if (dimensions == 0) { + dimensions = bytes.length; + } else { + if (bytes.length != dimensions) { + throw new IllegalArgumentException("All dense_vector must have the same number of dimensions. Expected: " + + dimensions + ", found: " + bytes.length); + } + } + builder.beginPositionEntry(); + for (byte value : bytes) { + builder.appendFloat(value); + } + builder.endPositionEntry(); + } catch (IllegalArgumentException e) { + registerException(e); + builder.appendNull(); } - builder.beginPositionEntry(); - for (byte value : bytes) { - builder.appendFloat(value); - } - builder.endPositionEntry(); } } return builder.build(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java index d6b3ae759c362..dac96fb4c648a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java @@ -82,6 +82,17 @@ public int vectorArgumentsCount() { return 2; } +// @Override +// public BiConsumer postAnalysisPlanVerification() { +// return (lp, failures) -> { +// for (Expression child : children()) { +// if (child instanceof PostAnalysisVerificationAware pa) { +// pa.postAnalysisVerification(failures); +// } +// } +// }; +// } + @Override public final EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { return new SimilarityEvaluatorFactory( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index f2768eed1aa7b..cdecbf21275af 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -57,7 +57,6 @@ import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; -import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; @@ -2388,16 +2387,15 @@ private static void checkDenseVectorCastingHexKnn(String fieldName) { private static void checkDenseVectorEvalCastingKnn(String fieldName) { var plan = analyze(String.format(Locale.ROOT, """ - from test | eval query = [0, 1, 2] | where knn(%s, query, 10) + from test | eval query = to_dense_vector([0, 1, 2]) | where knn(%s, query, 10) """, fieldName), "mapping-dense_vector.json"); var limit = as(plan, Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - var queryVector = as(knn.query(), ToDenseVector.class); + var queryVector = as(knn.query(), ReferenceAttribute.class); assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); - var refAttr = as(queryVector.children().get(0), ReferenceAttribute.class); - assertThat(refAttr.name(), is("query")); + assertThat(queryVector.name(), is("query")); } public void testDenseVectorImplicitCastingSimilarityFunctions() { @@ -2490,7 +2488,7 @@ public void testDenseVectorEvalCastingSimilarityFunctions() { private void checkDenseVectorEvalCastingSimilarityFunction(String similarityFunction) { var plan = analyze(String.format(Locale.ROOT, """ - from test | eval query = [0.342, 0.164, 0.234] | eval similarity = %s + from test | eval query = to_dense_vector([0.342, 0.164, 0.234]) | eval similarity = %s """, similarityFunction), "mapping-dense_vector.json"); var limit = as(plan, Limit.class); @@ -2500,8 +2498,9 @@ private void checkDenseVectorEvalCastingSimilarityFunction(String similarityFunc var similarity = as(alias.child(), VectorSimilarityFunction.class); var left = as(similarity.left(), FieldAttribute.class); assertThat(List.of("float_vector", "byte_vector"), hasItem(left.name())); - var right = as(similarity.right(), ToDenseVector.class); + var right = as(similarity.right(), ReferenceAttribute.class); assertThat(right.dataType(), is(DENSE_VECTOR)); + assertThat(right.name(), is("query")); } public void testNoDenseVectorFailsSimilarityFunction() { @@ -2531,6 +2530,33 @@ private void checkNoDenseVectorFailsSimilarityFunction(String similarityFunction ); } + public void testVectorFunctionHexImplicitCastingError() { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + checkVectorFunctionHexImplicitCastingError("where knn(float_vector, \"notcorrect\", 10)"); + } + if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkVectorFunctionHexImplicitCastingError("eval s = v_dot_product(\"notcorrect\", 0.342)"); + } + if (EsqlCapabilities.Cap.L1_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkVectorFunctionHexImplicitCastingError("eval s = v_l1_norm(\"notcorrect\", 0.342)"); + } + if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkVectorFunctionHexImplicitCastingError("eval s = v_l2_norm(\"notcorrect\", 0.342)"); + } + if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { + checkVectorFunctionHexImplicitCastingError("eval s = v_hamming(\"notcorrect\", 0.342)"); + } + } + + private void checkVectorFunctionHexImplicitCastingError(String clause) { + var query = "from test | " + clause; + VerificationException error = expectThrows(VerificationException.class, () -> analyze(query, "mapping-dense_vector.json")); + assertThat( + error.getMessage(), + containsString("for argument [\"notcorrect\"]; dense_vectors must be a hex-encoded string") + ); + } + public void testMagnitudePlanWithDenseVectorImplicitCasting() { assumeTrue("v_magnitude not available", EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION.isEnabled()); From 2656547d428feaab6f3bafcfd54aca83f88f8d27 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 18:35:26 +0200 Subject: [PATCH 13/25] Spotless --- .../xpack/esql/plugin/KnnFunctionIT.java | 1 - .../xpack/esql/analysis/Analyzer.java | 2 +- .../scalar/convert/ToDenseVector.java | 21 ++---- .../ToDenseVectorFromStringEvaluator.java | 15 ++-- .../vector/VectorSimilarityFunction.java | 20 +++--- .../xpack/esql/analysis/AnalyzerTests.java | 5 +- .../scalar/convert/ToDenseVectorTests.java | 70 ++++++------------- .../optimizer/LogicalPlanOptimizerTests.java | 1 - 8 files changed, 48 insertions(+), 87 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 7fbb2c1851cc5..85e55a60bc613 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -191,7 +191,6 @@ public void testKnnWithLookupJoin() { ); } - public void testKnnIncorrectCasting() { var query = String.format(Locale.ROOT, """ FROM test diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 37bdedd3654f2..43861152f49bc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1670,7 +1670,7 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor List args = vectorFunction.arguments(); List newArgs = new ArrayList<>(); // Only the first vector arguments are vectors and considered for casting - int vectorArgsCount = ((VectorFunction)vectorFunction).vectorArgumentsCount(); + int vectorArgsCount = ((VectorFunction) vectorFunction).vectorArgumentsCount(); for (int i = 0; i < args.size(); i++) { Expression arg = args.get(i); if (i < vectorArgsCount && arg.resolved()) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index a73564af09970..619bf7b2f769e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -12,8 +12,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.ConvertEvaluator; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; -import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware; -import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; import org.elasticsearch.xpack.esql.common.Failure; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -76,28 +74,19 @@ public BiConsumer postAnalysisPlanVerification() { Object fold = arg.fold(FoldContext.small()); if ((fold instanceof List list) && arg.dataType().isNumeric()) { if (list.size() <= 1) { - failures.add(Failure.fail( - this, - "[" + sourceText() + "] requires at least two values to convert to a dense_vector" - )); + failures.add( + Failure.fail(this, "[" + sourceText() + "] requires at least two values to convert to a dense_vector") + ); } return; } if ((arg.dataType() == KEYWORD) && fold instanceof BytesRef bytesRef) { if (bytesRef.length == 0) { - failures.add(Failure.fail( - this, - "[" - + sourceText() - + "] must be a non-empty hexadecimal string")); + failures.add(Failure.fail(this, "[" + sourceText() + "] must be a non-empty hexadecimal string")); } return; } - failures.add(Failure.fail( - this, - "[" - + sourceText() - + "] must be a multi-valued input of numbers or an hexadecimal string")); + failures.add(Failure.fail(this, "[" + sourceText() + "] must be a multi-valued input of numbers or an hexadecimal string")); } }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java index 63ec9ee28b5f5..42977774511c1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java @@ -16,15 +16,12 @@ import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.core.Releasables; -import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.tree.Source; import java.util.HexFormat; public class ToDenseVectorFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance( - ToDenseVectorFromStringEvaluator.class - ); + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromStringEvaluator.class); private final EvalOperator.ExpressionEvaluator field; @@ -65,8 +62,12 @@ public Block evalBlock(Block b) { dimensions = bytes.length; } else { if (bytes.length != dimensions) { - throw new IllegalArgumentException("All dense_vector must have the same number of dimensions. Expected: " - + dimensions + ", found: " + bytes.length); + throw new IllegalArgumentException( + "All dense_vector must have the same number of dimensions. Expected: " + + dimensions + + ", found: " + + bytes.length + ); } } builder.beginPositionEntry(); @@ -74,7 +75,7 @@ public Block evalBlock(Block b) { builder.appendFloat(value); } builder.endPositionEntry(); - } catch (IllegalArgumentException e) { + } catch (IllegalArgumentException e) { registerException(e); builder.appendNull(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java index dac96fb4c648a..ae4fd014c4cae 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java @@ -82,16 +82,16 @@ public int vectorArgumentsCount() { return 2; } -// @Override -// public BiConsumer postAnalysisPlanVerification() { -// return (lp, failures) -> { -// for (Expression child : children()) { -// if (child instanceof PostAnalysisVerificationAware pa) { -// pa.postAnalysisVerification(failures); -// } -// } -// }; -// } + // @Override + // public BiConsumer postAnalysisPlanVerification() { + // return (lp, failures) -> { + // for (Expression child : children()) { + // if (child instanceof PostAnalysisVerificationAware pa) { + // pa.postAnalysisVerification(failures); + // } + // } + // }; + // } @Override public final EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index c6eaf00dc7be7..7df681dabe653 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2541,10 +2541,7 @@ public void testVectorFunctionHexImplicitCastingError() { private void checkVectorFunctionHexImplicitCastingError(String clause) { var query = "from test | " + clause; VerificationException error = expectThrows(VerificationException.class, () -> analyze(query, "mapping-dense_vector.json")); - assertThat( - error.getMessage(), - containsString("for argument [\"notcorrect\"]; dense_vectors must be a hex-encoded string") - ); + assertThat(error.getMessage(), containsString("for argument [\"notcorrect\"]; dense_vectors must be a hex-encoded string")); } public void testMagnitudePlanWithDenseVectorImplicitCasting() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 9d08da1219c71..58e93a5999e22 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -57,54 +57,30 @@ public static Iterable parameters() { ); })); - suppliers.add( - new TestCaseSupplier( - "double", - List.of(DataType.DOUBLE), - () -> { - List data = Arrays.asList(randomArray(1, 10, Double[]::new, ESTestCase::randomDouble)); - return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - data, - DataType.DOUBLE, - "double" - ) - ), - evaluatorName("Double", "d"), - DataType.DENSE_VECTOR, - equalTo(data.stream().map(Number::floatValue).toList()) - ); - } - ) - ); + suppliers.add(new TestCaseSupplier("double", List.of(DataType.DOUBLE), () -> { + List data = Arrays.asList(randomArray(1, 10, Double[]::new, ESTestCase::randomDouble)); + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(data, DataType.DOUBLE, "double")), + evaluatorName("Double", "d"), + DataType.DENSE_VECTOR, + equalTo(data.stream().map(Number::floatValue).toList()) + ); + })); - suppliers.add( - new TestCaseSupplier( - "keyword", - List.of(DataType.KEYWORD), - () -> { - byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 20)); - String data = HexFormat.of().formatHex(bytes); - List expected = new ArrayList<>(bytes.length); - for (int i = 0; i < bytes.length; i++) { - expected.add((float) bytes[i]); - } - return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData( - new BytesRef(data), - DataType.KEYWORD, - "keyword" - ) - ), - evaluatorName("String", "s"), - DataType.DENSE_VECTOR, - is(expected) - ); - } - ) - ); + suppliers.add(new TestCaseSupplier("keyword", List.of(DataType.KEYWORD), () -> { + byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 20)); + String data = HexFormat.of().formatHex(bytes); + List expected = new ArrayList<>(bytes.length); + for (int i = 0; i < bytes.length; i++) { + expected.add((float) bytes[i]); + } + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef(data), DataType.KEYWORD, "keyword")), + evaluatorName("String", "s"), + DataType.DENSE_VECTOR, + is(expected) + ); + })); return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(true, suppliers); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 4e25abc573389..6a527dbaba26f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8625,7 +8625,6 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { assertThat(leftAnd.right(), equalTo(rightAndPrefilter)); } - public void testTest() { assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); From 09566ed9e0a476839b6d2e02a8e2b299b26d62f6 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 18:53:59 +0200 Subject: [PATCH 14/25] Add comments, remove unnecessary code --- .../xpack/esql/plugin/KnnFunctionIT.java | 16 -------- .../xpack/esql/analysis/Analyzer.java | 5 ++- .../scalar/convert/ToDenseVector.java | 37 ++----------------- .../ToDenseVectorFromStringEvaluator.java | 8 +++- .../vector/VectorSimilarityFunction.java | 11 ------ .../optimizer/LogicalPlanOptimizerTests.java | 11 ------ 6 files changed, 13 insertions(+), 75 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 85e55a60bc613..d44a9b458b082 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -191,22 +191,6 @@ public void testKnnWithLookupJoin() { ); } - public void testKnnIncorrectCasting() { - var query = String.format(Locale.ROOT, """ - FROM test - | WHERE KNN(vector, "notcorrect", 5) - """); - - var error = expectThrows(VerificationException.class, () -> run(query)); - assertThat( - error.getMessage(), - containsString( - "line 3:13: [KNN] function cannot operate on [lookup_vector], supplied by an index [test_lookup] in non-STANDARD " - + "mode [lookup]" - ) - ); - } - @Before public void setup() throws IOException { assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 43861152f49bc..d4509b61957a3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1669,7 +1669,9 @@ private static Expression castStringLiteral(Expression from, DataType target) { private static Expression processVectorFunction(org.elasticsearch.xpack.esql.core.expression.function.Function vectorFunction) { List args = vectorFunction.arguments(); List newArgs = new ArrayList<>(); - // Only the first vector arguments are vectors and considered for casting + // Perform explicit casting for vector arguments. This is done instead of using TO_DENSE_VECTOR function for better error + // handling. Otherwise, a failure in TO_DENSE_VECTOR will be returned as null, and the user will be confused as to why + // the original function has a null parameter when one has been provided. int vectorArgsCount = ((VectorFunction) vectorFunction).vectorArgumentsCount(); for (int i = 0; i < args.size(); i++) { Expression arg = args.get(i); @@ -1678,7 +1680,6 @@ private static Expression processVectorFunction(org.elasticsearch.xpack.esql.cor Object folded = arg.fold(FoldContext.small()); List floatVector = null; if (folded instanceof List && arg.dataType().isNumeric()) { - // Convert to floats so blocks are created accordingly if (arg.dataType() == FLOAT) { floatVector = (List) folded; } else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java index 619bf7b2f769e..f70c0a59b2ece 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVector.java @@ -7,27 +7,20 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.convert; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.ConvertEvaluator; -import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; -import org.elasticsearch.xpack.esql.common.Failure; -import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.function.BiConsumer; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; @@ -35,7 +28,10 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; -public class ToDenseVector extends AbstractConvertFunction implements PostAnalysisPlanVerificationAware { +/** + * Converts a multi-valued input of numbers, or a hexadecimal string, to a dense_vector. + */ +public class ToDenseVector extends AbstractConvertFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ToDenseVector", @@ -66,31 +62,6 @@ public ToDenseVector( super(source, field); } - @Override - public BiConsumer postAnalysisPlanVerification() { - return (lp, failures) -> { - Expression arg = children().get(0); - if (arg.foldable()) { - Object fold = arg.fold(FoldContext.small()); - if ((fold instanceof List list) && arg.dataType().isNumeric()) { - if (list.size() <= 1) { - failures.add( - Failure.fail(this, "[" + sourceText() + "] requires at least two values to convert to a dense_vector") - ); - } - return; - } - if ((arg.dataType() == KEYWORD) && fold instanceof BytesRef bytesRef) { - if (bytesRef.length == 0) { - failures.add(Failure.fail(this, "[" + sourceText() + "] must be a non-empty hexadecimal string")); - } - return; - } - failures.add(Failure.fail(this, "[" + sourceText() + "] must be a multi-valued input of numbers or an hexadecimal string")); - } - }; - } - private ToDenseVector(StreamInput in) throws IOException { super(in); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java index 42977774511c1..67d19e03dc4d7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java @@ -20,12 +20,16 @@ import java.util.HexFormat; -public class ToDenseVectorFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { +/** + * String evaluator for to_dense_vector function. Converts a hexadecimal string to a dense_vector of bytes. + * Cannot be automatically generated as it generates multivalues for a single hex string, representing the dense_vector byte array. + */ +class ToDenseVectorFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ToDenseVectorFromStringEvaluator.class); private final EvalOperator.ExpressionEvaluator field; - public ToDenseVectorFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator field, DriverContext driverContext) { + ToDenseVectorFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator field, DriverContext driverContext) { super(driverContext, source); this.field = field; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java index ae4fd014c4cae..d6b3ae759c362 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java @@ -82,17 +82,6 @@ public int vectorArgumentsCount() { return 2; } - // @Override - // public BiConsumer postAnalysisPlanVerification() { - // return (lp, failures) -> { - // for (Expression child : children()) { - // if (child instanceof PostAnalysisVerificationAware pa) { - // pa.postAnalysisVerification(failures); - // } - // } - // }; - // } - @Override public final EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { return new SimilarityEvaluatorFactory( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 6a527dbaba26f..67bb40214cb7b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8625,17 +8625,6 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { assertThat(leftAnd.right(), equalTo(rightAndPrefilter)); } - public void testTest() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); - - var query = """ - from test - | where knn(dense_vector, 1127, 10) - """; - var optimized = planTypes(query); - var limit = as(optimized, Limit.class); - } - public void testMultipleKnnQueriesInPrefilters() { assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); From b8944eca1fda6e3874f5d269ab375534a614e5d2 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 09:26:34 +0200 Subject: [PATCH 15/25] Fix checkstyle --- .../scalar/convert/ToDenseVectorFromStringEvaluator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java index 67d19e03dc4d7..9470e744099b2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorFromStringEvaluator.java @@ -104,11 +104,11 @@ public void close() { Releasables.closeExpectNoException(field); } - public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Source source; private final EvalOperator.ExpressionEvaluator.Factory field; - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field) { + Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field) { this.source = source; this.field = field; } From 68a696b4fd7d5080b9a2be6eddb7e67bfc3d1681 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 10:07:01 +0200 Subject: [PATCH 16/25] Add docs --- .../esql/images/functions/to_dense_vector.svg | 1 + .../kibana/definition/functions/to_dense_vector.json | 12 ++++++++++++ .../esql/kibana/docs/functions/to_dense_vector.md | 10 ++++++++++ 3 files changed, 23 insertions(+) create mode 100644 docs/reference/query-languages/esql/images/functions/to_dense_vector.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/to_dense_vector.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/to_dense_vector.md diff --git a/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg b/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg new file mode 100644 index 0000000000000..36a8f66b3af2a --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg @@ -0,0 +1 @@ +TO_DENSE_VECTOR(field) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/to_dense_vector.json b/docs/reference/query-languages/esql/kibana/definition/functions/to_dense_vector.json new file mode 100644 index 0000000000000..932937bf10c6c --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/to_dense_vector.json @@ -0,0 +1,12 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "to_dense_vector", + "description" : "Converts a multi-valued input of numbers, or a hexadecimal string, to a dense_vector.", + "signatures" : [ ], + "examples" : [ + "row ints = [1, 2, 3]\n| eval vector = to_dense_vector(ints)\n| keep vector" + ], + "preview" : false, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/to_dense_vector.md b/docs/reference/query-languages/esql/kibana/docs/functions/to_dense_vector.md new file mode 100644 index 0000000000000..309d975be8bfc --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/to_dense_vector.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### TO DENSE VECTOR +Converts a multi-valued input of numbers, or a hexadecimal string, to a dense_vector. + +```esql +row ints = [1, 2, 3] +| eval vector = to_dense_vector(ints) +| keep vector +``` From 3b5b7be2365e690882e67ba0fa4a4497dbdaf4af Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 13:30:07 +0200 Subject: [PATCH 17/25] Use EsqlDataTypeConverter for converting strings, use target data type for checking which args to convert --- .../xpack/esql/analysis/Analyzer.java | 57 +++++-------- .../function/EsqlFunctionRegistry.java | 2 + .../esql/expression/function/vector/Knn.java | 5 -- .../expression/function/vector/Magnitude.java | 5 -- .../function/vector/VectorFunction.java | 11 +-- .../vector/VectorSimilarityFunction.java | 5 -- .../esql/type/EsqlDataTypeConverter.java | 26 +++++- .../xpack/esql/analysis/AnalyzerTests.java | 85 ++++++------------- 8 files changed, 75 insertions(+), 121 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index d4509b61957a3..b9abfafdc8945 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.analysis; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.BytesRefs; @@ -77,6 +76,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromAggregateMetricDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; @@ -139,7 +139,6 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.HexFormat; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -157,6 +156,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; @@ -1453,7 +1453,7 @@ private static Expression cast(org.elasticsearch.xpack.esql.core.expression.func return processIn(in); } if (f instanceof VectorFunction) { - return processVectorFunction(f); + return processVectorFunction(f, registry); } if (f instanceof EsqlScalarFunction || f instanceof GroupingFunction) { // exclude AggregateFunction until it is needed return processScalarOrGroupingFunction(f, registry); @@ -1666,46 +1666,29 @@ private static Expression castStringLiteral(Expression from, DataType target) { } @SuppressWarnings("unchecked") - private static Expression processVectorFunction(org.elasticsearch.xpack.esql.core.expression.function.Function vectorFunction) { + private static Expression processVectorFunction( + org.elasticsearch.xpack.esql.core.expression.function.Function vectorFunction, + EsqlFunctionRegistry registry + ) { List args = vectorFunction.arguments(); + List targetDataTypes = registry.getDataTypeForStringLiteralConversion(vectorFunction.getClass()); List newArgs = new ArrayList<>(); - // Perform explicit casting for vector arguments. This is done instead of using TO_DENSE_VECTOR function for better error - // handling. Otherwise, a failure in TO_DENSE_VECTOR will be returned as null, and the user will be confused as to why - // the original function has a null parameter when one has been provided. - int vectorArgsCount = ((VectorFunction) vectorFunction).vectorArgumentsCount(); + // Perform implicit casting for numeric and keyword values for (int i = 0; i < args.size(); i++) { Expression arg = args.get(i); - if (i < vectorArgsCount && arg.resolved()) { - if (arg.foldable()) { - Object folded = arg.fold(FoldContext.small()); - List floatVector = null; - if (folded instanceof List && arg.dataType().isNumeric()) { - if (arg.dataType() == FLOAT) { - floatVector = (List) folded; - } else { - floatVector = ((List) folded).stream().map(Number::floatValue).collect(Collectors.toList()); - } - } else if (folded instanceof BytesRef hexString && arg.dataType() == KEYWORD) { - try { - byte[] bytes = HexFormat.of().parseHex(hexString.utf8ToString()); - floatVector = new ArrayList<>(); - for (byte value : bytes) { - floatVector.add((float) value); + if (targetDataTypes.get(i) == DENSE_VECTOR) { + if (arg.resolved()) { + var dataType = arg.dataType(); + if (dataType == KEYWORD) { + if (arg.foldable()) { + Expression exp = castStringLiteral(arg, DENSE_VECTOR); + if (exp != arg) { + newArgs.add(exp); + continue; } - } catch (IllegalArgumentException e) { - throw new VerificationException( - "Error in [" - + vectorFunction.sourceText() - + "] for argument [" - + arg.sourceText() - + "]; dense_vectors must be a hex-encoded string: " - + e.getMessage() - ); } - } - if (floatVector != null) { - Literal denseVector = new Literal(arg.source(), floatVector, DataType.DENSE_VECTOR); - newArgs.add(denseVector); + } else if (arg.dataType().isNumeric()) { + newArgs.add(new ToDenseVector(vectorFunction.source(), arg)); continue; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 3da7002e6e2b3..a31d9b4851997 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -214,6 +214,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEOHASH; import static org.elasticsearch.xpack.esql.core.type.DataType.GEOHEX; @@ -252,6 +253,7 @@ public class EsqlFunctionRegistry { GEOTILE, BOOLEAN, UNSIGNED_LONG, + DENSE_VECTOR, UNSUPPORTED ); DATA_TYPE_CASTING_PRIORITY = new HashMap<>(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 5f97214583424..0b64fb43909df 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -203,11 +203,6 @@ public DataType dataType() { return DataType.BOOLEAN; } - @Override - public int vectorArgumentsCount() { - return 2; - } - @Override protected TypeResolution resolveParams() { return resolveField().and(resolveQuery()).and(resolveK()).and(Options.resolve(options(), source(), FOURTH, ALLOWED_OPTIONS)); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java index 5f302f7349703..f629de35af7b8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java @@ -96,11 +96,6 @@ protected TypeResolution resolveType() { return isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.FIRST, "dense_vector"); } - @Override - public int vectorArgumentsCount() { - return 1; - } - /** * Functional interface for evaluating the scalar value of the underlying float array. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java index 224f82794d851..ca983caf5615f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java @@ -9,14 +9,7 @@ /** * Marker interface for vector functions. Makes possible to do implicit casting - * from multi values to dense_vector field types, so parameters are actually + * from multi values and hex strings to dense_vector field types, so parameters are actually * processed as dense_vectors in vector functions */ -public interface VectorFunction { - - /** - * Number of arguments that should be treated as vectors. The first vectorArgumentsCount() arguments will be implicitly casted as - * dense_vector according to the value returned of this method - */ - int vectorArgumentsCount(); -} +public interface VectorFunction {} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java index d6b3ae759c362..1e9727398d458 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java @@ -77,11 +77,6 @@ public Object fold(FoldContext ctx) { return EvaluatorMapper.super.fold(source(), ctx); } - @Override - public int vectorArgumentsCount() { - return 2; - } - @Override public final EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { return new SimilarityEvaluatorFactory( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index e3a6b08cac1fd..457cb0a9fb264 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -47,6 +47,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatePeriod; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape; @@ -74,6 +75,8 @@ import java.time.ZoneId; import java.time.temporal.ChronoField; import java.time.temporal.TemporalAmount; +import java.util.ArrayList; +import java.util.HexFormat; import java.util.List; import java.util.Locale; import java.util.Map; @@ -88,6 +91,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEOHASH; import static org.elasticsearch.xpack.esql.core.type.DataType.GEOHEX; @@ -149,7 +153,8 @@ public class EsqlDataTypeConverter { entry(UNSIGNED_LONG, ToUnsignedLong::new), entry(VERSION, ToVersion::new), entry(DATE_PERIOD, ToDatePeriod::new), - entry(TIME_DURATION, ToTimeDuration::new) + entry(TIME_DURATION, ToTimeDuration::new), + entry(DENSE_VECTOR, ToDenseVector::new) ); public enum INTERVALS { @@ -272,6 +277,9 @@ public static Converter converterFor(DataType from, DataType to) { if (to == DataType.DATE_PERIOD) { return EsqlConverter.STRING_TO_DATE_PERIOD; } + if (to == DENSE_VECTOR) { + return EsqlConverter.STRING_TO_DENSE_VECTOR; + } } Converter converter = DataTypeConverter.converterFor(from, to); if (converter != null) { @@ -732,6 +740,19 @@ public static boolean unsignedLongToBoolean(long number) { return n instanceof BigInteger || n.longValue() != 0; } + public static List stringToDenseVector(String field) { + try { + byte[] bytes = HexFormat.of().parseHex(field); + List vector = new ArrayList<>(bytes.length); + for (byte value : bytes) { + vector.add((float) value); + } + return vector; + } catch (NumberFormatException e) { + throw new IllegalArgumentException(String.format(Locale.ROOT, "%s is not a valid hex string: %s", field, e.getMessage())); + } + } + public static long booleanToUnsignedLong(boolean number) { return number ? ONE_AS_UNSIGNED_LONG : ZERO_AS_UNSIGNED_LONG; } @@ -827,7 +848,8 @@ public enum EsqlConverter implements Converter { STRING_TO_SPATIAL(x -> EsqlDataTypeConverter.stringToSpatial(BytesRefs.toString(x))), STRING_TO_GEOHASH(x -> Geohash.longEncode(BytesRefs.toString(x))), STRING_TO_GEOTILE(x -> GeoTileUtils.longEncode(BytesRefs.toString(x))), - STRING_TO_GEOHEX(x -> H3.stringToH3(BytesRefs.toString(x))); + STRING_TO_GEOHEX(x -> H3.stringToH3(BytesRefs.toString(x))), + STRING_TO_DENSE_VECTOR(x -> EsqlDataTypeConverter.stringToDenseVector(BytesRefs.toString(x))); private static final String NAME = "esql-converter"; private final Function converter; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 7df681dabe653..f31804628d6bb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -57,6 +57,7 @@ import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; @@ -2367,9 +2368,9 @@ private static void checkDenseVectorCastingKnn(String fieldName) { var limit = as(plan, Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - var queryVector = as(knn.query(), Literal.class); - assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); - assertThat(queryVector.value(), equalTo(List.of(0.0f, 1.0f, 2.0f))); + var conversion = as(knn.query(), ToDenseVector.class); + var literal = as(conversion.field(), Literal.class); + assertThat(literal.value(), equalTo(List.of(0, 1, 2))); } private static void checkDenseVectorCastingHexKnn(String fieldName) { @@ -2402,37 +2403,37 @@ public void testDenseVectorImplicitCastingSimilarityFunctions() { if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorImplicitCastingSimilarityFunction( "v_cosine(float_vector, [0.342, 0.164, 0.234])", - List.of(0.342f, 0.164f, 0.234f) + List.of(0.342, 0.164, 0.234) ); - checkDenseVectorImplicitCastingSimilarityFunction("v_cosine(byte_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + checkDenseVectorImplicitCastingSimilarityFunction("v_cosine(byte_vector, [1, 2, 3])", List.of(1, 2, 3)); } if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorImplicitCastingSimilarityFunction( "v_dot_product(float_vector, [0.342, 0.164, 0.234])", - List.of(0.342f, 0.164f, 0.234f) + List.of(0.342, 0.164, 0.234) ); - checkDenseVectorImplicitCastingSimilarityFunction("v_dot_product(byte_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + checkDenseVectorImplicitCastingSimilarityFunction("v_dot_product(byte_vector, [1, 2, 3])", List.of(1, 2, 3)); } if (EsqlCapabilities.Cap.L1_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorImplicitCastingSimilarityFunction( "v_l1_norm(float_vector, [0.342, 0.164, 0.234])", - List.of(0.342f, 0.164f, 0.234f) + List.of(0.342, 0.164, 0.234) ); - checkDenseVectorImplicitCastingSimilarityFunction("v_l1_norm(byte_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + checkDenseVectorImplicitCastingSimilarityFunction("v_l1_norm(byte_vector, [1, 2, 3])", List.of(1, 2, 3)); } if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorImplicitCastingSimilarityFunction( "v_l2_norm(float_vector, [0.342, 0.164, 0.234])", - List.of(0.342f, 0.164f, 0.234f) + List.of(0.342, 0.164, 0.234) ); - checkDenseVectorImplicitCastingSimilarityFunction("v_l2_norm(float_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + checkDenseVectorImplicitCastingSimilarityFunction("v_l2_norm(float_vector, [1, 2, 3])", List.of(1, 2, 3)); } if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorImplicitCastingSimilarityFunction( "v_hamming(byte_vector, [0.342, 0.164, 0.234])", - List.of(0.342f, 0.164f, 0.234f) + List.of(0.342, 0.164, 0.234) ); - checkDenseVectorImplicitCastingSimilarityFunction("v_hamming(byte_vector, [1, 2, 3])", List.of(1f, 2f, 3f)); + checkDenseVectorImplicitCastingSimilarityFunction("v_hamming(byte_vector, [1, 2, 3])", List.of(1, 2, 3)); } } @@ -2448,9 +2449,9 @@ private void checkDenseVectorImplicitCastingSimilarityFunction(String similarity var similarity = as(alias.child(), VectorSimilarityFunction.class); var left = as(similarity.left(), FieldAttribute.class); assertThat(List.of("float_vector", "byte_vector"), hasItem(left.name())); - var right = as(similarity.right(), Literal.class); - assertThat(right.dataType(), is(DENSE_VECTOR)); - assertThat(right.value(), equalTo(expectedElems)); + var right = as(similarity.right(), ToDenseVector.class); + var literal = as(right.field(), Literal.class); + assertThat(literal.value(), equalTo(expectedElems)); } public void testDenseVectorEvalCastingSimilarityFunctions() { @@ -2493,33 +2494,6 @@ private void checkDenseVectorEvalCastingSimilarityFunction(String similarityFunc assertThat(right.name(), is("query")); } - public void testNoDenseVectorFailsSimilarityFunction() { - if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkNoDenseVectorFailsSimilarityFunction("v_cosine([0, 1, 2], 0.342)"); - } - if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkNoDenseVectorFailsSimilarityFunction("v_dot_product([0, 1, 2], 0.342)"); - } - if (EsqlCapabilities.Cap.L1_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkNoDenseVectorFailsSimilarityFunction("v_l1_norm([0, 1, 2], 0.342)"); - } - if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkNoDenseVectorFailsSimilarityFunction("v_l2_norm([0, 1, 2], 0.342)"); - } - if (EsqlCapabilities.Cap.HAMMING_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkNoDenseVectorFailsSimilarityFunction("v_hamming([0, 1, 2], 0.342)"); - } - } - - private void checkNoDenseVectorFailsSimilarityFunction(String similarityFunction) { - var query = String.format(Locale.ROOT, "row a = 1 | eval similarity = %s", similarityFunction); - VerificationException error = expectThrows(VerificationException.class, () -> analyze(query)); - assertThat( - error.getMessage(), - containsString("second argument of [" + similarityFunction + "] must be" + " [dense_vector], found value [0.342] type [double]") - ); - } - public void testVectorFunctionHexImplicitCastingError() { if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { checkVectorFunctionHexImplicitCastingError("where knn(float_vector, \"notcorrect\", 10)"); @@ -2541,7 +2515,13 @@ public void testVectorFunctionHexImplicitCastingError() { private void checkVectorFunctionHexImplicitCastingError(String clause) { var query = "from test | " + clause; VerificationException error = expectThrows(VerificationException.class, () -> analyze(query, "mapping-dense_vector.json")); - assertThat(error.getMessage(), containsString("for argument [\"notcorrect\"]; dense_vectors must be a hex-encoded string")); + assertThat( + error.getMessage(), + containsString( + "Cannot convert string [notcorrect] to [DENSE_VECTOR], " + + "error [notcorrect is not a valid hex string: not a hexadecimal digit: \"n\" = 110]" + ) + ); } public void testMagnitudePlanWithDenseVectorImplicitCasting() { @@ -2556,20 +2536,9 @@ public void testMagnitudePlanWithDenseVectorImplicitCasting() { var alias = as(eval.fields().get(0), Alias.class); assertEquals("scalar", alias.name()); var scalar = as(alias.child(), Magnitude.class); - var child = as(scalar.field(), Literal.class); - assertThat(child.dataType(), is(DENSE_VECTOR)); - assertThat(child.value(), equalTo(List.of(1.0f, 2.0f, 3.0f))); - } - - public void testNoDenseVectorFailsForMagnitude() { - assumeTrue("v_magnitude not available", EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION.isEnabled()); - - var query = String.format(Locale.ROOT, "row a = 1 | eval scalar = v_magnitude(0.342)"); - VerificationException error = expectThrows(VerificationException.class, () -> analyze(query)); - assertThat( - error.getMessage(), - containsString("first argument of [v_magnitude(0.342)] must be [dense_vector], found value [0.342] type [double]") - ); + var child = as(scalar.field(), ToDenseVector.class); + var literal = as(child.field(), Literal.class); + assertThat(literal.value(), equalTo(List.of(1, 2, 3))); } public void testRateRequiresCounterTypes() { From fe73d76e8ef1732846b62c44f277e896919e730c Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 13:39:01 +0200 Subject: [PATCH 18/25] Small fixes to Analyzer --- .../xpack/esql/analysis/Analyzer.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index b9abfafdc8945..a633b6e6a3f73 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1670,27 +1670,25 @@ private static Expression processVectorFunction( org.elasticsearch.xpack.esql.core.expression.function.Function vectorFunction, EsqlFunctionRegistry registry ) { + // Perform implicit casting for dense_vector from numeric and keyword values List args = vectorFunction.arguments(); List targetDataTypes = registry.getDataTypeForStringLiteralConversion(vectorFunction.getClass()); List newArgs = new ArrayList<>(); - // Perform implicit casting for numeric and keyword values for (int i = 0; i < args.size(); i++) { Expression arg = args.get(i); - if (targetDataTypes.get(i) == DENSE_VECTOR) { - if (arg.resolved()) { - var dataType = arg.dataType(); - if (dataType == KEYWORD) { - if (arg.foldable()) { - Expression exp = castStringLiteral(arg, DENSE_VECTOR); - if (exp != arg) { - newArgs.add(exp); - continue; - } + if (targetDataTypes.get(i) == DENSE_VECTOR && arg.resolved()) { + var dataType = arg.dataType(); + if (dataType == KEYWORD) { + if (arg.foldable()) { + Expression exp = castStringLiteral(arg, DENSE_VECTOR); + if (exp != arg) { + newArgs.add(exp); + continue; } - } else if (arg.dataType().isNumeric()) { - newArgs.add(new ToDenseVector(vectorFunction.source(), arg)); - continue; } + } else if (dataType.isNumeric()) { + newArgs.add(new ToDenseVector(vectorFunction.source(), arg)); + continue; } } newArgs.add(arg); From 0bceaa09b2b5d610e57743541c4adf258cf4cebf Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 14:30:19 +0200 Subject: [PATCH 19/25] Fix tests --- .../elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 2 +- .../function/scalar/convert/ToDenseVectorTests.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index db096cb6f8b73..6caf1bfffcb32 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2494,7 +2494,7 @@ private void checkDenseVectorEvalCastingSimilarityFunction(String similarityFunc } public void testVectorFunctionHexImplicitCastingError() { - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkVectorFunctionHexImplicitCastingError("where knn(float_vector, \"notcorrect\")"); } if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 58e93a5999e22..19b1b8bf264ae 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -38,7 +38,7 @@ public static Iterable parameters() { List suppliers = new ArrayList<>(); suppliers.add(new TestCaseSupplier("int", List.of(DataType.INTEGER), () -> { - List data = Arrays.asList(randomArray(1, 10, Integer[]::new, ESTestCase::randomInt)); + List data = Arrays.asList(randomArray(2, 10, Integer[]::new, ESTestCase::randomInt)); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(data, DataType.INTEGER, "int")), evaluatorName("Int", "i"), @@ -48,7 +48,7 @@ public static Iterable parameters() { })); suppliers.add(new TestCaseSupplier("long", List.of(DataType.LONG), () -> { - List data = Arrays.asList(randomArray(1, 10, Long[]::new, ESTestCase::randomLong)); + List data = Arrays.asList(randomArray(2, 10, Long[]::new, ESTestCase::randomLong)); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(data, DataType.LONG, "long")), evaluatorName("Long", "l"), @@ -58,7 +58,7 @@ public static Iterable parameters() { })); suppliers.add(new TestCaseSupplier("double", List.of(DataType.DOUBLE), () -> { - List data = Arrays.asList(randomArray(1, 10, Double[]::new, ESTestCase::randomDouble)); + List data = Arrays.asList(randomArray(2, 10, Double[]::new, ESTestCase::randomDouble)); return new TestCaseSupplier.TestCase( List.of(new TestCaseSupplier.TypedData(data, DataType.DOUBLE, "double")), evaluatorName("Double", "d"), @@ -68,7 +68,7 @@ public static Iterable parameters() { })); suppliers.add(new TestCaseSupplier("keyword", List.of(DataType.KEYWORD), () -> { - byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 20)); + byte[] bytes = randomByteArrayOfLength(randomIntBetween(2, 20)); String data = HexFormat.of().formatHex(bytes); List expected = new ArrayList<>(bytes.length); for (int i = 0; i < bytes.length; i++) { From a5423dec1a22ccc9ff821a2a9b1bdf17179be697 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 15:36:56 +0200 Subject: [PATCH 20/25] Update telemetry test --- .../yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index f670b7e639764..a2840648b444e 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -129,7 +129,7 @@ setup: - match: {esql.functions.coalesce: $functions_coalesce} - gt: {esql.functions.categorize: $functions_categorize} # Testing for the entire function set isn't feasible, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 171} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 172} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": From 0b6687a8f97ea0e303f3e3cc60f42f36e33f6feb Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 15:40:37 +0200 Subject: [PATCH 21/25] Add docs --- .../functions/description/to_dense_vector.md | 6 +++++ .../functions/examples/to_dense_vector.md | 15 ++++++++++++ .../functions/layout/to_dense_vector.md | 23 +++++++++++++++++++ .../functions/parameters/to_dense_vector.md | 7 ++++++ .../esql/images/functions/to_dense_vector.svg | 2 +- 5 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/to_dense_vector.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/to_dense_vector.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/to_dense_vector.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/to_dense_vector.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/to_dense_vector.md b/docs/reference/query-languages/esql/_snippets/functions/description/to_dense_vector.md new file mode 100644 index 0000000000000..15bc7760b7803 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/to_dense_vector.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Converts a multi-valued input of numbers, or a hexadecimal string, to a dense_vector. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/to_dense_vector.md b/docs/reference/query-languages/esql/_snippets/functions/examples/to_dense_vector.md new file mode 100644 index 0000000000000..f202aeeff6dc9 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/to_dense_vector.md @@ -0,0 +1,15 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql +row ints = [1, 2, 3] +| eval vector = to_dense_vector(ints) +| keep vector +``` + +| vector:dense_vector | +| --- | +| [1.0, 2.0, 3.0] | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/to_dense_vector.md b/docs/reference/query-languages/esql/_snippets/functions/layout/to_dense_vector.md new file mode 100644 index 0000000000000..a5eaef0deed19 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/to_dense_vector.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `TO_DENSE_VECTOR` [esql-to_dense_vector] + +**Syntax** + +:::{image} ../../../images/functions/to_dense_vector.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/to_dense_vector.md +::: + +:::{include} ../description/to_dense_vector.md +::: + +:::{include} ../types/to_dense_vector.md +::: + +:::{include} ../examples/to_dense_vector.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/to_dense_vector.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/to_dense_vector.md new file mode 100644 index 0000000000000..f68b97a694bf9 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/to_dense_vector.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`field` +: multi-valued input of numbers or hexadecimal string to convert. + diff --git a/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg b/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg index 36a8f66b3af2a..54304ee44b11f 100644 --- a/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg +++ b/docs/reference/query-languages/esql/images/functions/to_dense_vector.svg @@ -1 +1 @@ -TO_DENSE_VECTOR(field) \ No newline at end of file +TO_DENSE_VECTOR(field) \ No newline at end of file From e66a9b4415dab0836f7cf026fda4652722b90c6e Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 4 Sep 2025 17:56:00 +0200 Subject: [PATCH 22/25] Guard name writeable --- .../xpack/esql/expression/ExpressionWritables.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 471ecefd168dd..16a38671db62c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.expression; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.expression.ExpressionCoreWritables; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateWritables; @@ -206,7 +207,9 @@ public static List unaryScalars() { entries.add(ToDatetime.ENTRY); entries.add(ToDateNanos.ENTRY); entries.add(ToDegrees.ENTRY); - entries.add(ToDenseVector.ENTRY); + if (EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()) { + entries.add(ToDenseVector.ENTRY); + } entries.add(ToDouble.ENTRY); entries.add(ToGeoShape.ENTRY); entries.add(ToCartesianShape.ENTRY); From 7ce8d72ab9cbf6c2edf7e58bc61edcf2fa99c915 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 9 Sep 2025 18:33:01 +0200 Subject: [PATCH 23/25] Add test guards for capability --- .../elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 6caf1bfffcb32..56691481409b0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2350,6 +2350,8 @@ public void testImplicitCasting() { public void testDenseVectorImplicitCastingKnn() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); + assumeTrue("dense vector casting must be enabled", EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()); + checkDenseVectorCastingHexKnn("float_vector"); checkDenseVectorCastingHexKnn("byte_vector"); @@ -2399,6 +2401,8 @@ private static void checkDenseVectorEvalCastingKnn(String fieldName) { } public void testDenseVectorImplicitCastingSimilarityFunctions() { + assumeTrue("dense vector casting must be enabled", EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()); + if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorImplicitCastingSimilarityFunction( "v_cosine(float_vector, [0.342, 0.164, 0.234])", @@ -2454,6 +2458,8 @@ private void checkDenseVectorImplicitCastingSimilarityFunction(String similarity } public void testDenseVectorEvalCastingSimilarityFunctions() { + assumeTrue("dense vector casting must be enabled", EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()); + if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { checkDenseVectorEvalCastingSimilarityFunction("v_cosine(float_vector, query)"); checkDenseVectorEvalCastingSimilarityFunction("v_cosine(byte_vector, query)"); @@ -2494,6 +2500,8 @@ private void checkDenseVectorEvalCastingSimilarityFunction(String similarityFunc } public void testVectorFunctionHexImplicitCastingError() { + assumeTrue("dense vector casting must be enabled", EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()); + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkVectorFunctionHexImplicitCastingError("where knn(float_vector, \"notcorrect\")"); } From 935a00001a8139fa9cf8703635a48350e8ed5690 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 10 Sep 2025 05:59:55 +0000 Subject: [PATCH 24/25] [CI] Auto commit changes from spotless --- .../xpack/esql/expression/function/EsqlFunctionRegistry.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 9e40bb8aaa469..79f610bc9ad98 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -520,7 +520,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(FirstOverTime.class, uni(FirstOverTime::new), "first_over_time"), def(Score.class, uni(Score::new), Score.NAME), def(Term.class, bi(Term::new), "term"), - def(ToDenseVector.class, ToDenseVector::new, "to_dense_vector"), + def(ToDenseVector.class, ToDenseVector::new, "to_dense_vector"), def(Knn.class, tri(Knn::new), "knn"), def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"), def(DotProduct.class, DotProduct::new, "v_dot_product"), From 590540ecd941bebfcb88ef00b2418be18c7e6505 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 10 Sep 2025 10:13:58 +0200 Subject: [PATCH 25/25] Fix release tests --- .../esql/type/EsqlDataTypeConverter.java | 57 +++++++++++-------- .../scalar/convert/ToDenseVectorTests.java | 7 +++ 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 457cb0a9fb264..446ba48288b84 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -28,6 +28,7 @@ import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -76,6 +77,8 @@ import java.time.temporal.ChronoField; import java.time.temporal.TemporalAmount; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.HexFormat; import java.util.List; import java.util.Locale; @@ -83,7 +86,6 @@ import java.util.function.BiFunction; import java.util.function.Function; -import static java.util.Map.entry; import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; @@ -131,31 +133,38 @@ public class EsqlDataTypeConverter { public static final DateFormatter HOUR_MINUTE_SECOND = DateFormatter.forPattern("strict_hour_minute_second_fraction"); - private static final Map> TYPE_TO_CONVERTER_FUNCTION = Map.ofEntries( - entry(AGGREGATE_METRIC_DOUBLE, ToAggregateMetricDouble::new), - entry(BOOLEAN, ToBoolean::new), - entry(CARTESIAN_POINT, ToCartesianPoint::new), - entry(CARTESIAN_SHAPE, ToCartesianShape::new), - entry(DATETIME, ToDatetime::new), - entry(DATE_NANOS, ToDateNanos::new), + private static final Map> TYPE_TO_CONVERTER_FUNCTION; + + static { + Map> typeToConverter = new HashMap<>(); + typeToConverter.put(AGGREGATE_METRIC_DOUBLE, ToAggregateMetricDouble::new); + typeToConverter.put(BOOLEAN, ToBoolean::new); + typeToConverter.put(CARTESIAN_POINT, ToCartesianPoint::new); + typeToConverter.put(CARTESIAN_SHAPE, ToCartesianShape::new); + typeToConverter.put(DATETIME, ToDatetime::new); + typeToConverter.put(DATE_NANOS, ToDateNanos::new); // ToDegrees, typeless - entry(DOUBLE, ToDouble::new), - entry(GEO_POINT, ToGeoPoint::new), - entry(GEO_SHAPE, ToGeoShape::new), - entry(GEOHASH, ToGeohash::new), - entry(GEOTILE, ToGeotile::new), - entry(GEOHEX, ToGeohex::new), - entry(INTEGER, ToInteger::new), - entry(IP, ToIpLeadingZerosRejected::new), - entry(LONG, ToLong::new), + typeToConverter.put(DOUBLE, ToDouble::new); + typeToConverter.put(GEO_POINT, ToGeoPoint::new); + typeToConverter.put(GEO_SHAPE, ToGeoShape::new); + typeToConverter.put(GEOHASH, ToGeohash::new); + typeToConverter.put(GEOTILE, ToGeotile::new); + typeToConverter.put(GEOHEX, ToGeohex::new); + typeToConverter.put(INTEGER, ToInteger::new); + typeToConverter.put(IP, ToIpLeadingZerosRejected::new); + typeToConverter.put(LONG, ToLong::new); // ToRadians, typeless - entry(KEYWORD, ToString::new), - entry(UNSIGNED_LONG, ToUnsignedLong::new), - entry(VERSION, ToVersion::new), - entry(DATE_PERIOD, ToDatePeriod::new), - entry(TIME_DURATION, ToTimeDuration::new), - entry(DENSE_VECTOR, ToDenseVector::new) - ); + typeToConverter.put(KEYWORD, ToString::new); + typeToConverter.put(UNSIGNED_LONG, ToUnsignedLong::new); + typeToConverter.put(VERSION, ToVersion::new); + typeToConverter.put(DATE_PERIOD, ToDatePeriod::new); + typeToConverter.put(TIME_DURATION, ToTimeDuration::new); + + if (EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()) { + typeToConverter.put(DENSE_VECTOR, ToDenseVector::new); + } + TYPE_TO_CONVERTER_FUNCTION = Collections.unmodifiableMap(typeToConverter); + } public enum INTERVALS { // TIME_DURATION, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java index 19b1b8bf264ae..e4e153d25bf8f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToDenseVectorTests.java @@ -12,11 +12,13 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.junit.BeforeClass; import java.util.ArrayList; import java.util.Arrays; @@ -29,6 +31,11 @@ public class ToDenseVectorTests extends AbstractScalarFunctionTestCase { + @BeforeClass + public static void checkCapability() { + assumeTrue("To_DenseVector function capability", EsqlCapabilities.Cap.TO_DENSE_VECTOR_FUNCTION.isEnabled()); + } + public ToDenseVectorTests(@Name("TestCase") Supplier testCaseSupplier) { this.testCase = testCaseSupplier.get(); }