Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,24 @@
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.PlanStreamInput;
import org.elasticsearch.xpack.versionfield.Version;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Objects;

import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT;
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.CARTESIAN;
import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO;

Expand All @@ -45,10 +51,23 @@ public class Literal extends LeafExpression {

public Literal(Source source, Object value, DataType dataType) {
super(source);
assert noPlainStrings(value, dataType);
this.dataType = dataType;
this.value = value;
}

private boolean noPlainStrings(Object value, DataType dataType) {
if (dataType == KEYWORD || dataType == TEXT || dataType == VERSION) {
if (value instanceof String) {
return false;
}
if (value instanceof Collection<?> c) {
return c.stream().allMatch(x -> noPlainStrings(x, dataType));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: consider pattern matching here

Suggested change
if (value instanceof String) {
return false;
}
if (value instanceof Collection<?> c) {
return c.stream().allMatch(x -> noPlainStrings(x, dataType));
}
return switch (value) {
case String s -> false;
case Collection<?> c -> c.stream().allMatch(x -> noPlainStrings(x, dataType));
default -> true;
}

}
return true;
}

private static Literal readFrom(StreamInput in) throws IOException {
Source source = Source.readFrom((StreamInput & PlanStreamInput) in);
Object value = in.readGenericValue();
Expand Down Expand Up @@ -122,7 +141,20 @@ public boolean equals(Object obj) {

@Override
public String toString() {
String str = String.valueOf(value);
String str;
if (dataType == KEYWORD || dataType == TEXT) {
str = BytesRefs.toString(value);
} else if (dataType == VERSION && value instanceof BytesRef br) {
str = new Version(br).toString();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an improvement? Or were Versions a readable String before?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We had some cases (mostly unit tests) where we passed plain strings to VERSION literals.
Even for tests it's not a good thing, as we are testing something that is different than the actual production execution.

// } else if (dataType == IP && value instanceof BytesRef ip) {
// str = DocValueFormat.IP.format(ip);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just in case, is this intentionally commented?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah... unfortunately it is, for two reasons:

  • IP conversion from/to BytesRefs could trigger assertion errors, and it's pretty annoying in tests
  • We are explicitly using strings for IPs in some cases, see CobineDisjunctions; we'll have to change that before we can force IPs as BytesRefs

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add this as a comment/todo referencing an issue to resolve it eventually?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created an issue and I'm linking it in a TODO

} else {
str = String.valueOf(value);
}

if (str == null) {
str = "null";
}
if (str.length() > 500) {
return str.substring(0, 500) + "...";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand Down Expand Up @@ -39,7 +40,7 @@ public class MapExpression extends Expression {

private final Map<Expression, Expression> map;

private final Map<Object, Expression> keyFoldedMap;
private final Map<String, Expression> keyFoldedMap;

public MapExpression(Source source, List<Expression> entries) {
super(source, entries);
Expand All @@ -54,7 +55,7 @@ public MapExpression(Source source, List<Expression> entries) {
entryExpressions.add(new EntryExpression(key.source(), key, value));
map.put(key, value);
if (key instanceof Literal l) {
this.keyFoldedMap.put(l.value(), value);
this.keyFoldedMap.put(BytesRefs.toString(l.value()), value);
}
}
}
Expand Down Expand Up @@ -95,7 +96,7 @@ public Map<Expression, Expression> map() {
return map;
}

public Map<Object, Expression> keyFoldedMap() {
public Map<String, Expression> keyFoldedMap() {
return keyFoldedMap;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import joptsimple.internal.Strings;

import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
import org.elasticsearch.xpack.esql.core.tree.AbstractNodeTestCase;
Expand All @@ -20,7 +21,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.Supplier;

Expand Down Expand Up @@ -62,7 +62,7 @@ static class ValueAndCompatibleTypes {
new ValueAndCompatibleTypes(ESTestCase::randomLong, LONG, FLOAT, DOUBLE, BOOLEAN),
new ValueAndCompatibleTypes(ESTestCase::randomFloat, FLOAT, LONG, DOUBLE, BOOLEAN),
new ValueAndCompatibleTypes(ESTestCase::randomDouble, DOUBLE, LONG, FLOAT, BOOLEAN),
new ValueAndCompatibleTypes(() -> randomAlphaOfLength(5), KEYWORD)
new ValueAndCompatibleTypes(() -> BytesRefs.toBytesRef(randomAlphaOfLength(5)), KEYWORD)
);

public static Literal randomLiteral() {
Expand Down Expand Up @@ -129,14 +129,14 @@ public void testReplaceChildren() {

public void testToString() {
assertThat(new Literal(Source.EMPTY, 1, LONG).toString(), equalTo("1"));
assertThat(new Literal(Source.EMPTY, "short", KEYWORD).toString(), equalTo("short"));
assertThat(new Literal(Source.EMPTY, BytesRefs.toBytesRef("short"), KEYWORD).toString(), equalTo("short"));
// toString should limit it's length
String tooLong = Strings.repeat('a', 510);
assertThat(new Literal(Source.EMPTY, tooLong, KEYWORD).toString(), equalTo(Strings.repeat('a', 500) + "..."));
assertThat(new Literal(Source.EMPTY, BytesRefs.toBytesRef(tooLong), KEYWORD).toString(), equalTo(Strings.repeat('a', 500) + "..."));

for (ValueAndCompatibleTypes g : GENERATORS) {
Literal lit = new Literal(Source.EMPTY, g.valueSupplier.get(), randomFrom(g.validDataTypes));
assertThat(lit.toString(), equalTo(Objects.toString(lit.value())));
assertThat(lit.toString(), equalTo(BytesRefs.toString(lit.value())));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.esql.core.expression.function.scalar;

import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.type.DataType;

Expand All @@ -15,10 +16,13 @@
public final class FunctionTestUtils {

public static Literal l(Object value) {
return new Literal(EMPTY, value, DataType.fromJava(value));
return l(value, DataType.fromJava(value));
}

public static Literal l(Object value, DataType type) {
if (value instanceof String && (type == DataType.TEXT || type == DataType.KEYWORD)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: type == DataType.TEXT || type == DataType.KEYWORD theck looks a bit cheaper. Do you mind making it first (before instanceof)?

value = BytesRefs.toBytesRef(value);
}
return new Literal(EMPTY, value, type);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.esql.core.util;

import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand All @@ -22,6 +23,8 @@
import static org.elasticsearch.test.ESTestCase.randomFrom;
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;

public final class TestUtils {
private TestUtils() {}
Expand All @@ -39,6 +42,10 @@ public static Literal of(Source source, Object value) {
if (value instanceof Literal) {
return (Literal) value;
}
DataType type = DataType.fromJava(value);
if (value instanceof String && (type == TEXT || type == KEYWORD)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

value = BytesRefs.toBytesRef(value);
}
return new Literal(source, value, DataType.fromJava(value));
}

Expand All @@ -58,12 +65,16 @@ public static FieldAttribute getFieldAttribute(String name, DataType dataType) {
return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true));
}

/** Similar to {@link String#strip()}, but removes the WS throughout the entire string. */
/**
* Similar to {@link String#strip()}, but removes the WS throughout the entire string.
*/
public static String stripThrough(String input) {
return WS_PATTERN.matcher(input).replaceAll(StringUtils.EMPTY);
}

/** Returns the input string, but with parts of it having the letter casing changed. */
/**
* Returns the input string, but with parts of it having the letter casing changed.
*/
public static String randomCasing(String input) {
StringBuilder sb = new StringBuilder(input.length());
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen); i < inputLen; i += step) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigArrays;
Expand Down Expand Up @@ -227,7 +228,11 @@ public static Literal of(Source source, Object value) {
if (value instanceof Literal) {
return (Literal) value;
}
return new Literal(source, value, DataType.fromJava(value));
var dataType = DataType.fromJava(value);
if (value instanceof String) {
value = BytesRefs.toBytesRef(value);
}
return new Literal(source, value, dataType);
}

public static ReferenceAttribute referenceAttribute(String name, DataType type) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ eth0 |gamma |fe80::cae2:65ff:fece:feb9
lo0 |gamma |fe80::cae2:65ff:fece:feb9 |fe81::cae2:65ff:fece:feb9
;

cdirMatchEqualsInsOrsIPs
cicdirMatchEqualsInsOrsIPs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cicdirMatchEqualsInsOrsIPs
cidrMatchEqualsInsOrsIPs

required_capability: combine_disjunctive_cidrmatches

FROM hosts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -860,3 +860,21 @@ c: long | scalerank: long
10 | 3
15 | 2
;


testMatchWithReplace
required_capability: match_function
required_capability: no_plain_strings_in_literals
from books
| keep book_no, author
| where match(author, REPLACE("FaulkneX", "X", "r"))
| sort book_no
| limit 5;

book_no:keyword | author:text
2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott]
2713 | William Faulkner
2847 | Colleen Faulkner
2883 | William Faulkner
3293 | Danny Faulkner
;
Original file line number Diff line number Diff line change
Expand Up @@ -1207,7 +1207,15 @@ public enum Cap {
/**
* Support parameters for SAMPLE command.
*/
PARAMETER_FOR_SAMPLE(Build.current().isSnapshot());
PARAMETER_FOR_SAMPLE(Build.current().isSnapshot()),

/**
* From now, Literal only accepts strings as BytesRefs.
* No java.lang.String anymore.
*
* https://github.com/elastic/elasticsearch/issues/129322
*/
NO_PLAIN_STRINGS_IN_LITERALS;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

package org.elasticsearch.xpack.esql.analysis;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.logging.HeaderWarning;
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.core.Strings;
import org.elasticsearch.index.IndexMode;
Expand Down Expand Up @@ -318,7 +320,7 @@ protected LogicalPlan rule(Enrich plan, AnalyzerContext context) {
// the policy does not exist
return plan;
}
final String policyName = (String) plan.policyName().fold(FoldContext.small() /* TODO remove me */);
final String policyName = ((BytesRef) plan.policyName().fold(FoldContext.small() /* TODO remove me */)).utf8ToString();
final var resolved = context.enrichResolution().getResolvedPolicy(policyName, plan.mode());
if (resolved != null) {
var policy = new EnrichPolicy(resolved.matchType(), null, List.of(), resolved.matchField(), resolved.enrichFields());
Expand Down Expand Up @@ -401,7 +403,7 @@ private static class ResolveInference extends ParameterizedAnalyzerRule<Inferenc
protected LogicalPlan rule(InferencePlan<?> plan, AnalyzerContext context) {
assert plan.inferenceId().resolved() && plan.inferenceId().foldable();

String inferenceId = plan.inferenceId().fold(FoldContext.small()).toString();
String inferenceId = BytesRefs.toString(plan.inferenceId().fold(FoldContext.small()));
ResolvedInference resolvedInference = context.inferenceResolution().getResolvedInference(inferenceId);

if (resolvedInference != null && resolvedInference.taskType() == plan.taskType()) {
Expand Down Expand Up @@ -431,7 +433,7 @@ protected LogicalPlan rule(Lookup lookup, AnalyzerContext context) {
// the parser passes the string wrapped in a literal
Source source = lookup.source();
Expression tableNameExpression = lookup.tableName();
String tableName = lookup.tableName().toString();
String tableName = BytesRefs.toString(tableNameExpression.fold(FoldContext.small() /* TODO remove me */));
Map<String, Map<String, Column>> tables = context.configuration().tables();
LocalRelation localRelation = null;

Expand Down Expand Up @@ -1563,18 +1565,22 @@ private static boolean supportsStringImplicitCasting(DataType type) {
}

private static UnresolvedAttribute unresolvedAttribute(Expression value, String type, Exception e) {
String name = BytesRefs.toString(value.fold(FoldContext.small()) /* TODO remove me */);
String message = LoggerMessageFormat.format(
null,
"Cannot convert string [{}] to [{}], error [{}]",
value.fold(FoldContext.small() /* TODO remove me */),
name,
type,
(e instanceof ParsingException pe) ? pe.getErrorMessage() : e.getMessage()
);
return new UnresolvedAttribute(value.source(), String.valueOf(value.fold(FoldContext.small() /* TODO remove me */)), message);
return new UnresolvedAttribute(value.source(), name, message);
}

private static Expression castStringLiteralToTemporalAmount(Expression from) {
try {
TemporalAmount result = maybeParseTemporalAmount(from.fold(FoldContext.small() /* TODO remove me */).toString().strip());
TemporalAmount result = maybeParseTemporalAmount(
((BytesRef) from.fold(FoldContext.small() /* TODO remove me */)).utf8ToString().strip()
);
if (result == null) {
return from;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.aggregation.AggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.CountAggregatorFunction;
import org.elasticsearch.compute.data.AggregateMetricDoubleBlockBuilder;
Expand Down Expand Up @@ -163,7 +164,7 @@ public Expression surrogate() {
return new Mul(
s,
new Coalesce(s, new MvCount(s, field), List.of(new Literal(s, 0, DataType.INTEGER))),
new Count(s, new Literal(s, StringUtils.WILDCARD, DataType.KEYWORD))
new Count(s, new Literal(s, BytesRefs.toBytesRef(StringUtils.WILDCARD), DataType.KEYWORD))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.aggregation.AggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.SumDoubleAggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.SumIntAggregatorFunctionSupplier;
Expand Down Expand Up @@ -144,7 +145,7 @@ public Expression surrogate() {

// SUM(const) is equivalent to MV_SUM(const)*COUNT(*).
return field.foldable()
? new Mul(s, new MvSum(s, field), new Count(s, new Literal(s, StringUtils.WILDCARD, DataType.KEYWORD)))
? new Mul(s, new MvSum(s, field), new Count(s, new Literal(s, BytesRefs.toBytesRef(StringUtils.WILDCARD), DataType.KEYWORD)))
: null;
}
}
Loading