From cae4ed4fbe5d638fd4cf6289864edc5db61b70ff Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Fri, 20 Jun 2025 16:57:08 +0200 Subject: [PATCH 1/4] Add one more test of FTF with LOOKUP JOIN --- .../xpack/esql/CsvTestsDataLoader.java | 4 ++-- .../src/main/resources/lookup-join.csv-spec | 18 +++++++++++++++- .../main/resources/match-operator.csv-spec | 21 +++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index f1eb32afbdfee..64915fc874ba8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -87,7 +87,7 @@ public class CsvTestsDataLoader { private static final TestDataset SAMPLE_DATA_TS_NANOS = SAMPLE_DATA.withIndex("sample_data_ts_nanos") .withData("sample_data_ts_nanos.csv") .withTypeMapping(Map.of("@timestamp", "date_nanos")); - private static final TestDataset LOOKUP_SAMPLE_DATA_TS_NANOS = SAMPLE_DATA_TS_NANOS.withIndex("lookup_sample_data_ts_nanos") + private static final TestDataset SAMPLE_DATA_TS_NANOS_LOOKUP = SAMPLE_DATA_TS_NANOS.withIndex("sample_data_ts_nanos_lookup") .withSetting("lookup-settings.json"); private static final TestDataset MISSING_IP_SAMPLE_DATA = new TestDataset("missing_ip_sample_data"); private static final TestDataset SAMPLE_DATA_PARTIAL_MAPPING = new TestDataset("partial_mapping_sample_data"); @@ -172,7 +172,7 @@ public class CsvTestsDataLoader { Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS), - Map.entry(LOOKUP_SAMPLE_DATA_TS_NANOS.indexName, LOOKUP_SAMPLE_DATA_TS_NANOS), + Map.entry(SAMPLE_DATA_TS_NANOS_LOOKUP.indexName, SAMPLE_DATA_TS_NANOS_LOOKUP), Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 88e5ca2f482ef..de337cba60147 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -120,6 +120,22 @@ language_code:integer | language_name:keyword 4 | German ; +selfJoinWithOffset +required_capability: join_lookup_v12 + +FROM languages_lookup +| EVAL language_code = language_code + 2 +| LOOKUP JOIN languages_lookup ON language_code +| SORT language_code +; + +language_code:integer | language_name:keyword +3 |Spanish +4 |German +5 |null +6 |null +; + nonUniqueLeftKeyOnTheDataNode required_capability: join_lookup_v12 @@ -4648,7 +4664,7 @@ required_capability: join_lookup_v12 required_capability: date_nanos_lookup_join FROM sample_data_ts_nanos -| LOOKUP JOIN lookup_sample_data_ts_nanos ON @timestamp +| LOOKUP JOIN sample_data_ts_nanos_lookup ON @timestamp | KEEP @timestamp, client_ip, event_duration, message | SORT @timestamp DESC ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec index d93c6b3884d22..351cdc230a3d9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec @@ -752,6 +752,27 @@ host:keyword | semantic_text_field:text | language_name:keyword | language_code: "host1" | live long and prosper | English | 1 ; +testFTFWithLookupJoin +required_capability: match_operator_colon +required_capability: join_lookup_v12 + +FROM sample_*,-sample_data_* METADATA _index +| WHERE message : "Connected to 10.1.0.1" +| WHERE MATCH_PHRASE(message, "Connected to 10.1.0.1") +| WHERE KQL("message : Connected*") +| WHERE QSTR("message : *") +| RENAME @timestamp AS @timestamp_millis +| LOOKUP JOIN sample_data_ts_nanos_lookup ON client_ip +| KEEP *,_index +| SORT @timestamp +; + +@timestamp_millis:date |client_ip:ip |@timestamp:date_nanos |event_duration:long |message:keyword |_index:keyword +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:51:54.732123456Z|725448 |Connection error |sample_data +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:52:55.015123456Z|8268153 |Connection error |sample_data +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:53:55.832123456Z|5033755 |Connection error |sample_data +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:55:01.543123456Z|1756467 |Connected to 10.1.0.1|sample_data +; testMatchInStatsNonPushable required_capability: match_operator_colon From a655b84eb2bb61fe90b3a86aaef31030c6d2e3b6 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Mon, 23 Jun 2025 14:10:23 +0200 Subject: [PATCH 2/4] resolve resolution confict --- .../java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java | 3 ++- .../qa/testFixtures/src/main/resources/lookup-join.csv-spec | 2 +- .../testFixtures/src/main/resources/match-operator.csv-spec | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 64915fc874ba8..53a223c1453e1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -87,7 +87,8 @@ public class CsvTestsDataLoader { private static final TestDataset SAMPLE_DATA_TS_NANOS = SAMPLE_DATA.withIndex("sample_data_ts_nanos") .withData("sample_data_ts_nanos.csv") .withTypeMapping(Map.of("@timestamp", "date_nanos")); - private static final TestDataset SAMPLE_DATA_TS_NANOS_LOOKUP = SAMPLE_DATA_TS_NANOS.withIndex("sample_data_ts_nanos_lookup") + // the double underscore is meant to not match `sample_data*`, but do match `sample_*` + private static final TestDataset SAMPLE_DATA_TS_NANOS_LOOKUP = SAMPLE_DATA_TS_NANOS.withIndex("sample__data_ts_nanos_lookup") .withSetting("lookup-settings.json"); private static final TestDataset MISSING_IP_SAMPLE_DATA = new TestDataset("missing_ip_sample_data"); private static final TestDataset SAMPLE_DATA_PARTIAL_MAPPING = new TestDataset("partial_mapping_sample_data"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 337efee3fc171..6254b42e176fa 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -4724,7 +4724,7 @@ required_capability: join_lookup_v12 required_capability: date_nanos_lookup_join FROM sample_data_ts_nanos -| LOOKUP JOIN sample_data_ts_nanos_lookup ON @timestamp +| LOOKUP JOIN sample__data_ts_nanos_lookup ON @timestamp | KEEP @timestamp, client_ip, event_duration, message | SORT @timestamp DESC ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec index 351cdc230a3d9..17fd319795512 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec @@ -756,13 +756,13 @@ testFTFWithLookupJoin required_capability: match_operator_colon required_capability: join_lookup_v12 -FROM sample_*,-sample_data_* METADATA _index +FROM sample_*,-sample__data_* METADATA _index | WHERE message : "Connected to 10.1.0.1" | WHERE MATCH_PHRASE(message, "Connected to 10.1.0.1") | WHERE KQL("message : Connected*") | WHERE QSTR("message : *") | RENAME @timestamp AS @timestamp_millis -| LOOKUP JOIN sample_data_ts_nanos_lookup ON client_ip +| LOOKUP JOIN sample__data_ts_nanos_lookup ON client_ip | KEEP *,_index | SORT @timestamp ; From 25b4d61bdaccfdc78dd30247e5d2708fd746ff1e Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Mon, 23 Jun 2025 14:39:22 +0200 Subject: [PATCH 3/4] required caps --- .../qa/testFixtures/src/main/resources/match-operator.csv-spec | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec index 17fd319795512..637a557461e77 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec @@ -754,6 +754,9 @@ host:keyword | semantic_text_field:text | language_name:keyword | language_code: testFTFWithLookupJoin required_capability: match_operator_colon +required_capability: match_phrase_function +required_capability: kql_function +required_capability: qstr_function required_capability: join_lookup_v12 FROM sample_*,-sample__data_* METADATA _index From 2f25f1ab08dbffda084bf7a78744a5e3c5ac2c46 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Mon, 23 Jun 2025 17:13:21 +0200 Subject: [PATCH 4/4] dedup results --- .../main/resources/match-operator.csv-spec | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec index 637a557461e77..fa8cf51b6f89c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/match-operator.csv-spec @@ -759,22 +759,23 @@ required_capability: kql_function required_capability: qstr_function required_capability: join_lookup_v12 -FROM sample_*,-sample__data_* METADATA _index +FROM sample_*,-sample__data_* | WHERE message : "Connected to 10.1.0.1" | WHERE MATCH_PHRASE(message, "Connected to 10.1.0.1") | WHERE KQL("message : Connected*") | WHERE QSTR("message : *") -| RENAME @timestamp AS @timestamp_millis +| EVAL @timestamp_millis = TO_DATETIME(@timestamp) +| EVAL client_ip = TO_IP(client_ip) | LOOKUP JOIN sample__data_ts_nanos_lookup ON client_ip -| KEEP *,_index -| SORT @timestamp +| STATS BY @timestamp_millis, client_ip, @timestamp, event_duration, message +| SORT event_duration ; -@timestamp_millis:date |client_ip:ip |@timestamp:date_nanos |event_duration:long |message:keyword |_index:keyword -2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:51:54.732123456Z|725448 |Connection error |sample_data -2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:52:55.015123456Z|8268153 |Connection error |sample_data -2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:53:55.832123456Z|5033755 |Connection error |sample_data -2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:55:01.543123456Z|1756467 |Connected to 10.1.0.1|sample_data +@timestamp_millis:date |client_ip:ip |@timestamp:date_nanos |event_duration:long |message:keyword +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:51:54.732123456Z|725448 |Connection error +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:55:01.543123456Z|1756467 |Connected to 10.1.0.1 +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:53:55.832123456Z|5033755 |Connection error +2023-10-23T13:55:01.543Z|172.21.3.15 |2023-10-23T13:52:55.015123456Z|8268153 |Connection error ; testMatchInStatsNonPushable