Skip to content

Commit 07dfcde

Browse files
ESQL: TO_IP can handle leading zeros (elastic#126532) (elastic#129697)
Modifies TO_IP so it can handle leading `0`s in ipv4s. Here's how it works now: ``` ROW ip = TO_IP("192.168.0.1") // OK! ROW ip = TO_IP("192.168.010.1") // Fails ``` This adds ``` ROW ip = TO_IP("192.168.010.1", {"leading_zeros": "octal"}) ROW ip = TO_IP("192.168.010.1", {"leading_zeros": "decimal"}) ``` We do this because there isn't a consensus on how to parse leading zeros in ipv4s. The standard unix tools like `ping` and `ftp` interpret leading zeros as octal. Java's built in ip parsing interprets them as decimal. Because folks are using this for security rules we need to support all the choices. Closes elastic#125460 Co-authored-by: Nik Everett <[email protected]>
1 parent f43574c commit 07dfcde

32 files changed

+937
-205
lines changed

docs/changelog/126532.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 126532
2+
summary: TO_IP can handle leading zeros
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 125460

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ public class CsvTestsDataLoader {
120120
private static final TestDataset ADDRESSES = new TestDataset("addresses");
121121
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
122122
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
123+
private static final TestDataset LOGS = new TestDataset("logs");
123124
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
124125
private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector");
125126
private static final TestDataset COLORS = new TestDataset("colors");
@@ -172,6 +173,7 @@ public class CsvTestsDataLoader {
172173
Map.entry(ADDRESSES.indexName, ADDRESSES),
173174
Map.entry(BOOKS.indexName, BOOKS),
174175
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
176+
Map.entry(LOGS.indexName, LOGS),
175177
Map.entry(MV_TEXT.indexName, MV_TEXT),
176178
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
177179
Map.entry(COLORS.indexName, COLORS)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
@timestamp:date ,system:keyword,message:keyword
2+
2023-10-23T13:55:01.543Z, ping,Pinging 192.168.86.046
3+
2023-10-23T13:55:01.544Z, cron,Running cats
4+
2023-10-23T13:55:01.545Z, java,Doing java stuff for 192.168.86.038
5+
2023-10-23T13:55:01.546Z, java,More java stuff

x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,86 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
282282
// end::to_ip-result[]
283283
;
284284

285+
convertFromStringLeadingZerosDecimal
286+
required_capability: to_ip_leading_zeros
287+
// tag::to_ip_leading_zeros_decimal[]
288+
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"decimal"})
289+
// end::to_ip_leading_zeros_decimal[]
290+
;
291+
292+
// tag::to_ip_leading_zeros_decimal-result[]
293+
s:keyword | ip:ip
294+
1.1.010.1 | 1.1.10.1
295+
// end::to_ip_leading_zeros_decimal-result[]
296+
;
297+
298+
convertFromStringLeadingZerosOctal
299+
required_capability: to_ip_leading_zeros
300+
// tag::to_ip_leading_zeros_octal[]
301+
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"octal"})
302+
// end::to_ip_leading_zeros_octal[]
303+
;
304+
305+
// tag::to_ip_leading_zeros_octal-result[]
306+
s:keyword | ip:ip
307+
1.1.010.1 | 1.1.8.1
308+
// end::to_ip_leading_zeros_octal-result[]
309+
;
310+
311+
convertFromStringFancy
312+
required_capability: to_ip_leading_zeros
313+
FROM logs
314+
| KEEP @timestamp, system, message
315+
| EVAL client = CASE(
316+
system == "ping",
317+
TO_IP(REPLACE(message, "Pinging ", ""), {"leading_zeros": "octal"}),
318+
system == "java" AND STARTS_WITH(message, "Doing java stuff for "),
319+
TO_IP(REPLACE(message, "Doing java stuff for ", ""), {"leading_zeros": "decimal"}))
320+
| SORT @timestamp
321+
| LIMIT 4
322+
;
323+
324+
@timestamp:date |system:keyword|message:keyword |client:ip
325+
2023-10-23T13:55:01.543Z| ping|Pinging 192.168.86.046 |192.168.86.38
326+
2023-10-23T13:55:01.544Z| cron|Running cats |null
327+
2023-10-23T13:55:01.545Z| java|Doing java stuff for 192.168.86.038|192.168.86.38
328+
2023-10-23T13:55:01.546Z| java|More java stuff |null
329+
;
330+
331+
toIpInAgg
332+
ROW s = "1.1.1.1" | STATS COUNT(*) BY ip = TO_IP(s)
333+
;
334+
335+
COUNT(*):long | ip:ip
336+
1 | 1.1.1.1
337+
;
338+
339+
toIpInSort
340+
ROW s = "1.1.1.1" | SORT TO_IP(s)
341+
;
342+
343+
s:keyword
344+
1.1.1.1
345+
;
346+
347+
toIpInAggOctal
348+
required_capability: to_ip_leading_zeros
349+
ROW s = "1.1.010.1" | STATS COUNT(*) BY ip = TO_IP(s, {"leading_zeros":"octal"})
350+
;
351+
352+
COUNT(*):long | ip:ip
353+
1 | 1.1.8.1
354+
;
355+
356+
toIpInSortOctal
357+
required_capability: to_ip_leading_zeros
358+
ROW s = "1.1.010.1" | SORT TO_IP(s, {"leading_zeros":"octal"})
359+
;
360+
361+
s:keyword
362+
1.1.010.1
363+
;
364+
285365
cdirMatchOrsIPs
286366
required_capability: combine_disjunctive_cidrmatches
287367

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"properties" : {
3+
"@timestamp" : {
4+
"type" : "date"
5+
},
6+
"system" : {
7+
"type" : "keyword"
8+
},
9+
"message" : {
10+
"type" : "keyword"
11+
}
12+
}
13+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,11 @@ public enum Cap {
887887
*/
888888
TO_LOWER_MV,
889889

890+
/**
891+
* Support for the {@code leading_zeros} named parameter.
892+
*/
893+
TO_IP_LEADING_ZEROS,
894+
890895
/**
891896
* Resolve groupings before resolving references to groupings in the aggregations.
892897
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
5757
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
5858
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
59+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ConvertFunction;
5960
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction;
6061
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos;
6162
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
@@ -70,6 +71,7 @@
7071
import org.elasticsearch.xpack.esql.index.EsIndex;
7172
import org.elasticsearch.xpack.esql.index.IndexResolution;
7273
import org.elasticsearch.xpack.esql.inference.ResolvedInference;
74+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogateExpressions;
7375
import org.elasticsearch.xpack.esql.parser.ParsingException;
7476
import org.elasticsearch.xpack.esql.plan.IndexPattern;
7577
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
@@ -1450,10 +1452,12 @@ private LogicalPlan doRule(LogicalPlan plan) {
14501452
int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size();
14511453
// See if the eval function has an unresolved MultiTypeEsField field
14521454
// Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge)
1453-
plan = plan.transformExpressionsOnly(
1454-
AbstractConvertFunction.class,
1455-
convert -> resolveConvertFunction(convert, unionFieldAttributes)
1456-
);
1455+
plan = plan.transformExpressionsOnly(e -> {
1456+
if (e instanceof ConvertFunction convert) {
1457+
return resolveConvertFunction(convert, unionFieldAttributes);
1458+
}
1459+
return e;
1460+
});
14571461
// If no union fields were generated, return the plan as is
14581462
if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) {
14591463
return plan;
@@ -1484,7 +1488,8 @@ private LogicalPlan doRule(LogicalPlan plan) {
14841488
return plan;
14851489
}
14861490

1487-
private Expression resolveConvertFunction(AbstractConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
1491+
private Expression resolveConvertFunction(ConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
1492+
Expression convertExpression = (Expression) convert;
14881493
if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) {
14891494
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
14901495
Set<DataType> supportedTypes = convert.supportedTypes();
@@ -1517,7 +1522,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
15171522
// example, an expression like multiTypeEsField(synthetic=false, date_nanos)::date_nanos::datetime is rewritten to
15181523
// multiTypeEsField(synthetic=true, date_nanos)::datetime, the implicit casting is overwritten by explicit casting and
15191524
// the multiTypeEsField is not casted to datetime directly.
1520-
if (convert.dataType() == mtf.getDataType()) {
1525+
if (((Expression) convert).dataType() == mtf.getDataType()) {
15211526
return createIfDoesNotAlreadyExist(fa, mtf, unionFieldAttributes);
15221527
}
15231528

@@ -1531,21 +1536,23 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
15311536
String indexName = entry.getKey();
15321537
AbstractConvertFunction originalConversionFunction = (AbstractConvertFunction) entry.getValue();
15331538
Expression originalField = originalConversionFunction.field();
1534-
Expression newConvertFunction = convert.replaceChildren(Collections.singletonList(originalField));
1539+
Expression newConvertFunction = convertExpression.replaceChildren(Collections.singletonList(originalField));
15351540
indexToConversionExpressions.put(indexName, newConvertFunction);
15361541
}
15371542
MultiTypeEsField multiTypeEsField = new MultiTypeEsField(
15381543
fa.fieldName().string(),
1539-
convert.dataType(),
1544+
convertExpression.dataType(),
15401545
false,
15411546
indexToConversionExpressions
15421547
);
15431548
return createIfDoesNotAlreadyExist(fa, multiTypeEsField, unionFieldAttributes);
15441549
}
15451550
} else if (convert.field() instanceof AbstractConvertFunction subConvert) {
1546-
return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)));
1551+
return convertExpression.replaceChildren(
1552+
Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes))
1553+
);
15471554
}
1548-
return convert;
1555+
return convertExpression;
15491556
}
15501557

15511558
private Expression createIfDoesNotAlreadyExist(
@@ -1594,12 +1601,7 @@ private static boolean canConvertOriginalTypes(MultiTypeEsField multiTypeEsField
15941601
);
15951602
}
15961603

1597-
private static Expression typeSpecificConvert(
1598-
AbstractConvertFunction convert,
1599-
Source source,
1600-
DataType type,
1601-
InvalidMappedField mtf
1602-
) {
1604+
private static Expression typeSpecificConvert(ConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
16031605
EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
16041606
FieldAttribute originalFieldAttr = (FieldAttribute) convert.field();
16051607
FieldAttribute resolvedAttr = new FieldAttribute(
@@ -1611,7 +1613,13 @@ private static Expression typeSpecificConvert(
16111613
originalFieldAttr.id(),
16121614
true
16131615
);
1614-
return convert.replaceChildren(Collections.singletonList(resolvedAttr));
1616+
Expression e = ((Expression) convert).replaceChildren(Collections.singletonList(resolvedAttr));
1617+
/*
1618+
* Resolve surrogates immediately because these type specific conversions are serialized
1619+
* and SurrogateExpressions are expected to be resolved on the coordinating node. At least,
1620+
* TO_IP is expected to be resolved there.
1621+
*/
1622+
return SubstituteSurrogateExpressions.rule(e);
16151623
}
16161624
}
16171625

@@ -1696,7 +1704,7 @@ public LogicalPlan apply(LogicalPlan plan) {
16961704

16971705
private static void typeResolutions(
16981706
FieldAttribute fieldAttribute,
1699-
AbstractConvertFunction convert,
1707+
ConvertFunction convert,
17001708
DataType type,
17011709
InvalidMappedField imf,
17021710
HashMap<ResolveUnionTypes.TypeResolutionKey, Expression> typeResolutions

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
2727
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
2828
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
29-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
3029
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
30+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
31+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
32+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
3133
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
3234
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
3335
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
@@ -207,7 +209,9 @@ public static List<NamedWriteableRegistry.Entry> unaryScalars() {
207209
entries.add(ToGeoShape.ENTRY);
208210
entries.add(ToCartesianShape.ENTRY);
209211
entries.add(ToGeoPoint.ENTRY);
210-
entries.add(ToIP.ENTRY);
212+
entries.add(ToIpLeadingZerosDecimal.ENTRY);
213+
entries.add(ToIpLeadingZerosOctal.ENTRY);
214+
entries.add(ToIpLeadingZerosRejected.ENTRY);
211215
entries.add(ToInteger.ENTRY);
212216
entries.add(ToLong.ENTRY);
213217
entries.add(ToRadians.ENTRY);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.elasticsearch.xpack.esql.core.tree.Source;
1717
import org.elasticsearch.xpack.esql.core.type.DataType;
1818
import org.elasticsearch.xpack.esql.core.util.Check;
19+
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
1920
import org.elasticsearch.xpack.esql.expression.function.aggregate.Avg;
2021
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
2122
import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct;
@@ -55,8 +56,11 @@
5556
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
5657
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
5758
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
58-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
5959
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
60+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIp;
61+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
62+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
63+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
6064
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
6165
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
6266
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
@@ -241,6 +245,7 @@ public class EsqlFunctionRegistry {
241245
public EsqlFunctionRegistry() {
242246
register(functions());
243247
buildDataTypesForStringLiteralConversion(functions());
248+
nameSurrogates();
244249
}
245250

246251
EsqlFunctionRegistry(FunctionDefinition... functions) {
@@ -413,7 +418,7 @@ private static FunctionDefinition[][] functions() {
413418
def(ToDouble.class, ToDouble::new, "to_double", "to_dbl"),
414419
def(ToGeoPoint.class, ToGeoPoint::new, "to_geopoint"),
415420
def(ToGeoShape.class, ToGeoShape::new, "to_geoshape"),
416-
def(ToIP.class, ToIP::new, "to_ip"),
421+
def(ToIp.class, ToIp::new, "to_ip"),
417422
def(ToInteger.class, ToInteger::new, "to_integer", "to_int"),
418423
def(ToLong.class, ToLong::new, "to_long"),
419424
def(ToRadians.class, ToRadians::new, "to_radians"),
@@ -808,6 +813,15 @@ protected void buildDataTypesForStringLiteralConversion(FunctionDefinition[]...
808813
}
809814
}
810815

816+
/**
817+
* Add {@link #names} entries for functions that are not registered, but we rewrite to using {@link SurrogateExpression}.
818+
*/
819+
private void nameSurrogates() {
820+
names.put(ToIpLeadingZerosRejected.class, "TO_IP");
821+
names.put(ToIpLeadingZerosDecimal.class, "TO_IP");
822+
names.put(ToIpLeadingZerosOctal.class, "TO_IP");
823+
}
824+
811825
protected interface FunctionBuilder {
812826
Function build(Source source, List<Expression> children, Configuration cfg);
813827
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
* {@link org.elasticsearch.xpack.esql.expression.function.scalar}.
4444
* </p>
4545
*/
46-
public abstract class AbstractConvertFunction extends UnaryScalarFunction {
46+
public abstract class AbstractConvertFunction extends UnaryScalarFunction implements ConvertFunction {
4747

4848
// the numeric types convert functions need to handle; the other numeric types are converted upstream to one of these
4949
private static final List<DataType> NUMERIC_TYPES = List.of(DataType.INTEGER, DataType.LONG, DataType.UNSIGNED_LONG, DataType.DOUBLE);
@@ -76,11 +76,12 @@ protected TypeResolution resolveType() {
7676
return isTypeOrUnionType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes()));
7777
}
7878

79+
@Override
7980
public Set<DataType> supportedTypes() {
8081
return factories().keySet();
8182
}
8283

83-
private static String supportedTypesNames(Set<DataType> types) {
84+
static String supportedTypesNames(Set<DataType> types) {
8485
List<String> supportedTypesNames = new ArrayList<>(types.size());
8586
HashSet<DataType> supportTypes = new HashSet<>(types);
8687
if (supportTypes.containsAll(NUMERIC_TYPES)) {

0 commit comments

Comments
 (0)