Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/126532.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 126532
summary: TO_IP can handle leading zeros
area: ES|QL
type: bug
issues:
- 125460
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ public class CsvTestsDataLoader {
private static final TestDataset ADDRESSES = new TestDataset("addresses");
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
private static final TestDataset LOGS = new TestDataset("logs");
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector");
private static final TestDataset COLORS = new TestDataset("colors");
Expand Down Expand Up @@ -172,6 +173,7 @@ public class CsvTestsDataLoader {
Map.entry(ADDRESSES.indexName, ADDRESSES),
Map.entry(BOOKS.indexName, BOOKS),
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
Map.entry(LOGS.indexName, LOGS),
Map.entry(MV_TEXT.indexName, MV_TEXT),
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
Map.entry(COLORS.indexName, COLORS)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@timestamp:date ,system:keyword,message:keyword
2023-10-23T13:55:01.543Z, ping,Pinging 192.168.86.046
2023-10-23T13:55:01.544Z, cron,Running cats
2023-10-23T13:55:01.545Z, java,Doing java stuff for 192.168.86.038
2023-10-23T13:55:01.546Z, java,More java stuff
80 changes: 80 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,86 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
// end::to_ip-result[]
;

convertFromStringLeadingZerosDecimal
required_capability: to_ip_leading_zeros
// tag::to_ip_leading_zeros_decimal[]
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"decimal"})
// end::to_ip_leading_zeros_decimal[]
;

// tag::to_ip_leading_zeros_decimal-result[]
s:keyword | ip:ip
1.1.010.1 | 1.1.10.1
// end::to_ip_leading_zeros_decimal-result[]
;

convertFromStringLeadingZerosOctal
required_capability: to_ip_leading_zeros
// tag::to_ip_leading_zeros_octal[]
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"octal"})
// end::to_ip_leading_zeros_octal[]
;

// tag::to_ip_leading_zeros_octal-result[]
s:keyword | ip:ip
1.1.010.1 | 1.1.8.1
// end::to_ip_leading_zeros_octal-result[]
;

convertFromStringFancy
required_capability: to_ip_leading_zeros
FROM logs
| KEEP @timestamp, system, message
| EVAL client = CASE(
system == "ping",
TO_IP(REPLACE(message, "Pinging ", ""), {"leading_zeros": "octal"}),
system == "java" AND STARTS_WITH(message, "Doing java stuff for "),
TO_IP(REPLACE(message, "Doing java stuff for ", ""), {"leading_zeros": "decimal"}))
| SORT @timestamp
| LIMIT 4
;

@timestamp:date |system:keyword|message:keyword |client:ip
2023-10-23T13:55:01.543Z| ping|Pinging 192.168.86.046 |192.168.86.38
2023-10-23T13:55:01.544Z| cron|Running cats |null
2023-10-23T13:55:01.545Z| java|Doing java stuff for 192.168.86.038|192.168.86.38
2023-10-23T13:55:01.546Z| java|More java stuff |null
;

toIpInAgg
ROW s = "1.1.1.1" | STATS COUNT(*) BY ip = TO_IP(s)
;

COUNT(*):long | ip:ip
1 | 1.1.1.1
;

toIpInSort
ROW s = "1.1.1.1" | SORT TO_IP(s)
;

s:keyword
1.1.1.1
;

toIpInAggOctal
required_capability: to_ip_leading_zeros
ROW s = "1.1.010.1" | STATS COUNT(*) BY ip = TO_IP(s, {"leading_zeros":"octal"})
;

COUNT(*):long | ip:ip
1 | 1.1.8.1
;

toIpInSortOctal
required_capability: to_ip_leading_zeros
ROW s = "1.1.010.1" | SORT TO_IP(s, {"leading_zeros":"octal"})
;

s:keyword
1.1.010.1
;

cdirMatchOrsIPs
required_capability: combine_disjunctive_cidrmatches

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"properties" : {
"@timestamp" : {
"type" : "date"
},
"system" : {
"type" : "keyword"
},
"message" : {
"type" : "keyword"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,11 @@ public enum Cap {
*/
TO_LOWER_MV,

/**
* Support for the {@code leading_zeros} named parameter.
*/
TO_IP_LEADING_ZEROS,

/**
* Resolve groupings before resolving references to groupings in the aggregations.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
Expand All @@ -70,6 +71,7 @@
import org.elasticsearch.xpack.esql.index.EsIndex;
import org.elasticsearch.xpack.esql.index.IndexResolution;
import org.elasticsearch.xpack.esql.inference.ResolvedInference;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogateExpressions;
import org.elasticsearch.xpack.esql.parser.ParsingException;
import org.elasticsearch.xpack.esql.plan.IndexPattern;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
Expand Down Expand Up @@ -1450,10 +1452,12 @@ private LogicalPlan doRule(LogicalPlan plan) {
int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size();
// See if the eval function has an unresolved MultiTypeEsField field
// Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge)
plan = plan.transformExpressionsOnly(
AbstractConvertFunction.class,
convert -> resolveConvertFunction(convert, unionFieldAttributes)
);
plan = plan.transformExpressionsOnly(e -> {
if (e instanceof ConvertFunction convert) {
return resolveConvertFunction(convert, unionFieldAttributes);
}
return e;
});
// If no union fields were generated, return the plan as is
if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) {
return plan;
Expand Down Expand Up @@ -1484,7 +1488,8 @@ private LogicalPlan doRule(LogicalPlan plan) {
return plan;
}

private Expression resolveConvertFunction(AbstractConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
private Expression resolveConvertFunction(ConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
Expression convertExpression = (Expression) convert;
if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) {
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
Set<DataType> supportedTypes = convert.supportedTypes();
Expand Down Expand Up @@ -1517,7 +1522,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
// example, an expression like multiTypeEsField(synthetic=false, date_nanos)::date_nanos::datetime is rewritten to
// multiTypeEsField(synthetic=true, date_nanos)::datetime, the implicit casting is overwritten by explicit casting and
// the multiTypeEsField is not casted to datetime directly.
if (convert.dataType() == mtf.getDataType()) {
if (((Expression) convert).dataType() == mtf.getDataType()) {
return createIfDoesNotAlreadyExist(fa, mtf, unionFieldAttributes);
}

Expand All @@ -1531,21 +1536,23 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
String indexName = entry.getKey();
AbstractConvertFunction originalConversionFunction = (AbstractConvertFunction) entry.getValue();
Expression originalField = originalConversionFunction.field();
Expression newConvertFunction = convert.replaceChildren(Collections.singletonList(originalField));
Expression newConvertFunction = convertExpression.replaceChildren(Collections.singletonList(originalField));
indexToConversionExpressions.put(indexName, newConvertFunction);
}
MultiTypeEsField multiTypeEsField = new MultiTypeEsField(
fa.fieldName().string(),
convert.dataType(),
convertExpression.dataType(),
false,
indexToConversionExpressions
);
return createIfDoesNotAlreadyExist(fa, multiTypeEsField, unionFieldAttributes);
}
} else if (convert.field() instanceof AbstractConvertFunction subConvert) {
return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)));
return convertExpression.replaceChildren(
Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes))
);
}
return convert;
return convertExpression;
}

private Expression createIfDoesNotAlreadyExist(
Expand Down Expand Up @@ -1594,12 +1601,7 @@ private static boolean canConvertOriginalTypes(MultiTypeEsField multiTypeEsField
);
}

private static Expression typeSpecificConvert(
AbstractConvertFunction convert,
Source source,
DataType type,
InvalidMappedField mtf
) {
private static Expression typeSpecificConvert(ConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
FieldAttribute originalFieldAttr = (FieldAttribute) convert.field();
FieldAttribute resolvedAttr = new FieldAttribute(
Expand All @@ -1611,7 +1613,13 @@ private static Expression typeSpecificConvert(
originalFieldAttr.id(),
true
);
return convert.replaceChildren(Collections.singletonList(resolvedAttr));
Expression e = ((Expression) convert).replaceChildren(Collections.singletonList(resolvedAttr));
/*
* Resolve surrogates immediately because these type specific conversions are serialized
* and SurrogateExpressions are expected to be resolved on the coordinating node. At least,
* TO_IP is expected to be resolved there.
*/
return SubstituteSurrogateExpressions.rule(e);
}
}

Expand Down Expand Up @@ -1696,7 +1704,7 @@ public LogicalPlan apply(LogicalPlan plan) {

private static void typeResolutions(
FieldAttribute fieldAttribute,
AbstractConvertFunction convert,
ConvertFunction convert,
DataType type,
InvalidMappedField imf,
HashMap<ResolveUnionTypes.TypeResolutionKey, Expression> typeResolutions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
Expand Down Expand Up @@ -207,7 +209,9 @@ public static List<NamedWriteableRegistry.Entry> unaryScalars() {
entries.add(ToGeoShape.ENTRY);
entries.add(ToCartesianShape.ENTRY);
entries.add(ToGeoPoint.ENTRY);
entries.add(ToIP.ENTRY);
entries.add(ToIpLeadingZerosDecimal.ENTRY);
entries.add(ToIpLeadingZerosOctal.ENTRY);
entries.add(ToIpLeadingZerosRejected.ENTRY);
entries.add(ToInteger.ENTRY);
entries.add(ToLong.ENTRY);
entries.add(ToRadians.ENTRY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.Check;
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Avg;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct;
Expand Down Expand Up @@ -55,8 +56,11 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIp;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
Expand Down Expand Up @@ -241,6 +245,7 @@ public class EsqlFunctionRegistry {
public EsqlFunctionRegistry() {
register(functions());
buildDataTypesForStringLiteralConversion(functions());
nameSurrogates();
}

EsqlFunctionRegistry(FunctionDefinition... functions) {
Expand Down Expand Up @@ -413,7 +418,7 @@ private static FunctionDefinition[][] functions() {
def(ToDouble.class, ToDouble::new, "to_double", "to_dbl"),
def(ToGeoPoint.class, ToGeoPoint::new, "to_geopoint"),
def(ToGeoShape.class, ToGeoShape::new, "to_geoshape"),
def(ToIP.class, ToIP::new, "to_ip"),
def(ToIp.class, ToIp::new, "to_ip"),
def(ToInteger.class, ToInteger::new, "to_integer", "to_int"),
def(ToLong.class, ToLong::new, "to_long"),
def(ToRadians.class, ToRadians::new, "to_radians"),
Expand Down Expand Up @@ -808,6 +813,15 @@ protected void buildDataTypesForStringLiteralConversion(FunctionDefinition[]...
}
}

/**
* Add {@link #names} entries for functions that are not registered, but we rewrite to using {@link SurrogateExpression}.
*/
private void nameSurrogates() {
names.put(ToIpLeadingZerosRejected.class, "TO_IP");
names.put(ToIpLeadingZerosDecimal.class, "TO_IP");
names.put(ToIpLeadingZerosOctal.class, "TO_IP");
}

protected interface FunctionBuilder {
Function build(Source source, List<Expression> children, Configuration cfg);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
* {@link org.elasticsearch.xpack.esql.expression.function.scalar}.
* </p>
*/
public abstract class AbstractConvertFunction extends UnaryScalarFunction {
public abstract class AbstractConvertFunction extends UnaryScalarFunction implements ConvertFunction {

// the numeric types convert functions need to handle; the other numeric types are converted upstream to one of these
private static final List<DataType> NUMERIC_TYPES = List.of(DataType.INTEGER, DataType.LONG, DataType.UNSIGNED_LONG, DataType.DOUBLE);
Expand Down Expand Up @@ -76,11 +76,12 @@ protected TypeResolution resolveType() {
return isTypeOrUnionType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes()));
}

@Override
public Set<DataType> supportedTypes() {
return factories().keySet();
}

private static String supportedTypesNames(Set<DataType> types) {
static String supportedTypesNames(Set<DataType> types) {
List<String> supportedTypesNames = new ArrayList<>(types.size());
HashSet<DataType> supportTypes = new HashSet<>(types);
if (supportTypes.containsAll(NUMERIC_TYPES)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression.function.scalar.convert;

import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.type.DataType;

import java.util.Set;

/**
* A function that converts from one type to another.
*/
public interface ConvertFunction {
/**
* Expression containing the values to be converted.
*/
Expression field();

/**
* The types that {@link #field()} can have.
*/
Set<DataType> supportedTypes();
}
Loading