Skip to content

Commit 07dfcde

Browse files
ESQL: TO_IP can handle leading zeros (#126532) (#129697)
Modifies TO_IP so it can handle leading `0`s in ipv4s. Here's how it works now: ``` ROW ip = TO_IP("192.168.0.1") // OK! ROW ip = TO_IP("192.168.010.1") // Fails ``` This adds ``` ROW ip = TO_IP("192.168.010.1", {"leading_zeros": "octal"}) ROW ip = TO_IP("192.168.010.1", {"leading_zeros": "decimal"}) ``` We do this because there isn't a consensus on how to parse leading zeros in ipv4s. The standard unix tools like `ping` and `ftp` interpret leading zeros as octal. Java's built in ip parsing interprets them as decimal. Because folks are using this for security rules we need to support all the choices. Closes #125460 Co-authored-by: Nik Everett <[email protected]>
1 parent f43574c commit 07dfcde

32 files changed

+937
-205
lines changed

docs/changelog/126532.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 126532
2+
summary: TO_IP can handle leading zeros
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 125460

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ public class CsvTestsDataLoader {
120120
private static final TestDataset ADDRESSES = new TestDataset("addresses");
121121
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
122122
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
123+
private static final TestDataset LOGS = new TestDataset("logs");
123124
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
124125
private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector");
125126
private static final TestDataset COLORS = new TestDataset("colors");
@@ -172,6 +173,7 @@ public class CsvTestsDataLoader {
172173
Map.entry(ADDRESSES.indexName, ADDRESSES),
173174
Map.entry(BOOKS.indexName, BOOKS),
174175
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
176+
Map.entry(LOGS.indexName, LOGS),
175177
Map.entry(MV_TEXT.indexName, MV_TEXT),
176178
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
177179
Map.entry(COLORS.indexName, COLORS)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
@timestamp:date ,system:keyword,message:keyword
2+
2023-10-23T13:55:01.543Z, ping,Pinging 192.168.86.046
3+
2023-10-23T13:55:01.544Z, cron,Running cats
4+
2023-10-23T13:55:01.545Z, java,Doing java stuff for 192.168.86.038
5+
2023-10-23T13:55:01.546Z, java,More java stuff

x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,86 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
282282
// end::to_ip-result[]
283283
;
284284

285+
convertFromStringLeadingZerosDecimal
286+
required_capability: to_ip_leading_zeros
287+
// tag::to_ip_leading_zeros_decimal[]
288+
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"decimal"})
289+
// end::to_ip_leading_zeros_decimal[]
290+
;
291+
292+
// tag::to_ip_leading_zeros_decimal-result[]
293+
s:keyword | ip:ip
294+
1.1.010.1 | 1.1.10.1
295+
// end::to_ip_leading_zeros_decimal-result[]
296+
;
297+
298+
convertFromStringLeadingZerosOctal
299+
required_capability: to_ip_leading_zeros
300+
// tag::to_ip_leading_zeros_octal[]
301+
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"octal"})
302+
// end::to_ip_leading_zeros_octal[]
303+
;
304+
305+
// tag::to_ip_leading_zeros_octal-result[]
306+
s:keyword | ip:ip
307+
1.1.010.1 | 1.1.8.1
308+
// end::to_ip_leading_zeros_octal-result[]
309+
;
310+
311+
convertFromStringFancy
312+
required_capability: to_ip_leading_zeros
313+
FROM logs
314+
| KEEP @timestamp, system, message
315+
| EVAL client = CASE(
316+
system == "ping",
317+
TO_IP(REPLACE(message, "Pinging ", ""), {"leading_zeros": "octal"}),
318+
system == "java" AND STARTS_WITH(message, "Doing java stuff for "),
319+
TO_IP(REPLACE(message, "Doing java stuff for ", ""), {"leading_zeros": "decimal"}))
320+
| SORT @timestamp
321+
| LIMIT 4
322+
;
323+
324+
@timestamp:date |system:keyword|message:keyword |client:ip
325+
2023-10-23T13:55:01.543Z| ping|Pinging 192.168.86.046 |192.168.86.38
326+
2023-10-23T13:55:01.544Z| cron|Running cats |null
327+
2023-10-23T13:55:01.545Z| java|Doing java stuff for 192.168.86.038|192.168.86.38
328+
2023-10-23T13:55:01.546Z| java|More java stuff |null
329+
;
330+
331+
toIpInAgg
332+
ROW s = "1.1.1.1" | STATS COUNT(*) BY ip = TO_IP(s)
333+
;
334+
335+
COUNT(*):long | ip:ip
336+
1 | 1.1.1.1
337+
;
338+
339+
toIpInSort
340+
ROW s = "1.1.1.1" | SORT TO_IP(s)
341+
;
342+
343+
s:keyword
344+
1.1.1.1
345+
;
346+
347+
toIpInAggOctal
348+
required_capability: to_ip_leading_zeros
349+
ROW s = "1.1.010.1" | STATS COUNT(*) BY ip = TO_IP(s, {"leading_zeros":"octal"})
350+
;
351+
352+
COUNT(*):long | ip:ip
353+
1 | 1.1.8.1
354+
;
355+
356+
toIpInSortOctal
357+
required_capability: to_ip_leading_zeros
358+
ROW s = "1.1.010.1" | SORT TO_IP(s, {"leading_zeros":"octal"})
359+
;
360+
361+
s:keyword
362+
1.1.010.1
363+
;
364+
285365
cdirMatchOrsIPs
286366
required_capability: combine_disjunctive_cidrmatches
287367

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"properties" : {
3+
"@timestamp" : {
4+
"type" : "date"
5+
},
6+
"system" : {
7+
"type" : "keyword"
8+
},
9+
"message" : {
10+
"type" : "keyword"
11+
}
12+
}
13+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,11 @@ public enum Cap {
887887
*/
888888
TO_LOWER_MV,
889889

890+
/**
891+
* Support for the {@code leading_zeros} named parameter.
892+
*/
893+
TO_IP_LEADING_ZEROS,
894+
890895
/**
891896
* Resolve groupings before resolving references to groupings in the aggregations.
892897
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
5757
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
5858
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
59+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ConvertFunction;
5960
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction;
6061
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos;
6162
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
@@ -70,6 +71,7 @@
7071
import org.elasticsearch.xpack.esql.index.EsIndex;
7172
import org.elasticsearch.xpack.esql.index.IndexResolution;
7273
import org.elasticsearch.xpack.esql.inference.ResolvedInference;
74+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogateExpressions;
7375
import org.elasticsearch.xpack.esql.parser.ParsingException;
7476
import org.elasticsearch.xpack.esql.plan.IndexPattern;
7577
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
@@ -1450,10 +1452,12 @@ private LogicalPlan doRule(LogicalPlan plan) {
14501452
int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size();
14511453
// See if the eval function has an unresolved MultiTypeEsField field
14521454
// Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge)
1453-
plan = plan.transformExpressionsOnly(
1454-
AbstractConvertFunction.class,
1455-
convert -> resolveConvertFunction(convert, unionFieldAttributes)
1456-
);
1455+
plan = plan.transformExpressionsOnly(e -> {
1456+
if (e instanceof ConvertFunction convert) {
1457+
return resolveConvertFunction(convert, unionFieldAttributes);
1458+
}
1459+
return e;
1460+
});
14571461
// If no union fields were generated, return the plan as is
14581462
if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) {
14591463
return plan;
@@ -1484,7 +1488,8 @@ private LogicalPlan doRule(LogicalPlan plan) {
14841488
return plan;
14851489
}
14861490

1487-
private Expression resolveConvertFunction(AbstractConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
1491+
private Expression resolveConvertFunction(ConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
1492+
Expression convertExpression = (Expression) convert;
14881493
if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) {
14891494
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
14901495
Set<DataType> supportedTypes = convert.supportedTypes();
@@ -1517,7 +1522,7 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
15171522
// example, an expression like multiTypeEsField(synthetic=false, date_nanos)::date_nanos::datetime is rewritten to
15181523
// multiTypeEsField(synthetic=true, date_nanos)::datetime, the implicit casting is overwritten by explicit casting and
15191524
// the multiTypeEsField is not casted to datetime directly.
1520-
if (convert.dataType() == mtf.getDataType()) {
1525+
if (((Expression) convert).dataType() == mtf.getDataType()) {
15211526
return createIfDoesNotAlreadyExist(fa, mtf, unionFieldAttributes);
15221527
}
15231528

@@ -1531,21 +1536,23 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
15311536
String indexName = entry.getKey();
15321537
AbstractConvertFunction originalConversionFunction = (AbstractConvertFunction) entry.getValue();
15331538
Expression originalField = originalConversionFunction.field();
1534-
Expression newConvertFunction = convert.replaceChildren(Collections.singletonList(originalField));
1539+
Expression newConvertFunction = convertExpression.replaceChildren(Collections.singletonList(originalField));
15351540
indexToConversionExpressions.put(indexName, newConvertFunction);
15361541
}
15371542
MultiTypeEsField multiTypeEsField = new MultiTypeEsField(
15381543
fa.fieldName().string(),
1539-
convert.dataType(),
1544+
convertExpression.dataType(),
15401545
false,
15411546
indexToConversionExpressions
15421547
);
15431548
return createIfDoesNotAlreadyExist(fa, multiTypeEsField, unionFieldAttributes);
15441549
}
15451550
} else if (convert.field() instanceof AbstractConvertFunction subConvert) {
1546-
return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)));
1551+
return convertExpression.replaceChildren(
1552+
Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes))
1553+
);
15471554
}
1548-
return convert;
1555+
return convertExpression;
15491556
}
15501557

15511558
private Expression createIfDoesNotAlreadyExist(
@@ -1594,12 +1601,7 @@ private static boolean canConvertOriginalTypes(MultiTypeEsField multiTypeEsField
15941601
);
15951602
}
15961603

1597-
private static Expression typeSpecificConvert(
1598-
AbstractConvertFunction convert,
1599-
Source source,
1600-
DataType type,
1601-
InvalidMappedField mtf
1602-
) {
1604+
private static Expression typeSpecificConvert(ConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
16031605
EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
16041606
FieldAttribute originalFieldAttr = (FieldAttribute) convert.field();
16051607
FieldAttribute resolvedAttr = new FieldAttribute(
@@ -1611,7 +1613,13 @@ private static Expression typeSpecificConvert(
16111613
originalFieldAttr.id(),
16121614
true
16131615
);
1614-
return convert.replaceChildren(Collections.singletonList(resolvedAttr));
1616+
Expression e = ((Expression) convert).replaceChildren(Collections.singletonList(resolvedAttr));
1617+
/*
1618+
* Resolve surrogates immediately because these type specific conversions are serialized
1619+
* and SurrogateExpressions are expected to be resolved on the coordinating node. At least,
1620+
* TO_IP is expected to be resolved there.
1621+
*/
1622+
return SubstituteSurrogateExpressions.rule(e);
16151623
}
16161624
}
16171625

@@ -1696,7 +1704,7 @@ public LogicalPlan apply(LogicalPlan plan) {
16961704

16971705
private static void typeResolutions(
16981706
FieldAttribute fieldAttribute,
1699-
AbstractConvertFunction convert,
1707+
ConvertFunction convert,
17001708
DataType type,
17011709
InvalidMappedField imf,
17021710
HashMap<ResolveUnionTypes.TypeResolutionKey, Expression> typeResolutions

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
2727
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
2828
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
29-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
3029
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
30+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
31+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
32+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
3133
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
3234
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
3335
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
@@ -207,7 +209,9 @@ public static List<NamedWriteableRegistry.Entry> unaryScalars() {
207209
entries.add(ToGeoShape.ENTRY);
208210
entries.add(ToCartesianShape.ENTRY);
209211
entries.add(ToGeoPoint.ENTRY);
210-
entries.add(ToIP.ENTRY);
212+
entries.add(ToIpLeadingZerosDecimal.ENTRY);
213+
entries.add(ToIpLeadingZerosOctal.ENTRY);
214+
entries.add(ToIpLeadingZerosRejected.ENTRY);
211215
entries.add(ToInteger.ENTRY);
212216
entries.add(ToLong.ENTRY);
213217
entries.add(ToRadians.ENTRY);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.elasticsearch.xpack.esql.core.tree.Source;
1717
import org.elasticsearch.xpack.esql.core.type.DataType;
1818
import org.elasticsearch.xpack.esql.core.util.Check;
19+
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
1920
import org.elasticsearch.xpack.esql.expression.function.aggregate.Avg;
2021
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
2122
import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct;
@@ -55,8 +56,11 @@
5556
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
5657
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
5758
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
58-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
5959
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
60+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIp;
61+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
62+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
63+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
6064
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
6165
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
6266
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
@@ -241,6 +245,7 @@ public class EsqlFunctionRegistry {
241245
public EsqlFunctionRegistry() {
242246
register(functions());
243247
buildDataTypesForStringLiteralConversion(functions());
248+
nameSurrogates();
244249
}
245250

246251
EsqlFunctionRegistry(FunctionDefinition... functions) {
@@ -413,7 +418,7 @@ private static FunctionDefinition[][] functions() {
413418
def(ToDouble.class, ToDouble::new, "to_double", "to_dbl"),
414419
def(ToGeoPoint.class, ToGeoPoint::new, "to_geopoint"),
415420
def(ToGeoShape.class, ToGeoShape::new, "to_geoshape"),
416-
def(ToIP.class, ToIP::new, "to_ip"),
421+
def(ToIp.class, ToIp::new, "to_ip"),
417422
def(ToInteger.class, ToInteger::new, "to_integer", "to_int"),
418423
def(ToLong.class, ToLong::new, "to_long"),
419424
def(ToRadians.class, ToRadians::new, "to_radians"),
@@ -808,6 +813,15 @@ protected void buildDataTypesForStringLiteralConversion(FunctionDefinition[]...
808813
}
809814
}
810815

816+
/**
817+
* Add {@link #names} entries for functions that are not registered, but we rewrite to using {@link SurrogateExpression}.
818+
*/
819+
private void nameSurrogates() {
820+
names.put(ToIpLeadingZerosRejected.class, "TO_IP");
821+
names.put(ToIpLeadingZerosDecimal.class, "TO_IP");
822+
names.put(ToIpLeadingZerosOctal.class, "TO_IP");
823+
}
824+
811825
protected interface FunctionBuilder {
812826
Function build(Source source, List<Expression> children, Configuration cfg);
813827
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
* {@link org.elasticsearch.xpack.esql.expression.function.scalar}.
4444
* </p>
4545
*/
46-
public abstract class AbstractConvertFunction extends UnaryScalarFunction {
46+
public abstract class AbstractConvertFunction extends UnaryScalarFunction implements ConvertFunction {
4747

4848
// the numeric types convert functions need to handle; the other numeric types are converted upstream to one of these
4949
private static final List<DataType> NUMERIC_TYPES = List.of(DataType.INTEGER, DataType.LONG, DataType.UNSIGNED_LONG, DataType.DOUBLE);
@@ -76,11 +76,12 @@ protected TypeResolution resolveType() {
7676
return isTypeOrUnionType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes()));
7777
}
7878

79+
@Override
7980
public Set<DataType> supportedTypes() {
8081
return factories().keySet();
8182
}
8283

83-
private static String supportedTypesNames(Set<DataType> types) {
84+
static String supportedTypesNames(Set<DataType> types) {
8485
List<String> supportedTypesNames = new ArrayList<>(types.size());
8586
HashSet<DataType> supportTypes = new HashSet<>(types);
8687
if (supportTypes.containsAll(NUMERIC_TYPES)) {

0 commit comments

Comments
 (0)