diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md new file mode 100644 index 0000000000000..c27b8af8ccab7 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +```esql +FROM system_metrics +| LOOKUP JOIN host_inventory ON host.name +| LOOKUP JOIN ownerships ON host.name +``` diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md new file mode 100644 index 0000000000000..074a7c87b66a4 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +```esql +FROM app_logs +| LOOKUP JOIN service_owners ON service_id +``` diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md new file mode 100644 index 0000000000000..eec8d15d9dd5c --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +```esql +FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP +``` diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md new file mode 100644 index 0000000000000..070d6f2617ab0 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +```esql +FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP +| WHERE threat_level IS NOT NULL +``` diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnLeftSide.md b/docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnLeftSide.md new file mode 100644 index 0000000000000..b21f9da0784ef --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnLeftSide.md @@ -0,0 +1,14 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +```esql +FROM employees +| EVAL language_code = languages +| WHERE emp_no >= 10091 AND emp_no < 10094 +| LOOKUP JOIN languages_lookup ON language_code +``` + +| emp_no:integer | language_code:integer | language_name:keyword | +| --- | --- | --- | +| 10091 | 3 | Spanish | +| 10092 | 1 | English | +| 10093 | 3 | Spanish | diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnRightSide.md b/docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnRightSide.md new file mode 100644 index 0000000000000..6c02416323380 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/lookup-join.csv-spec/filterOnRightSide.md @@ -0,0 +1,14 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +```esql +FROM employees +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup ON language_code +| WHERE emp_no >= 10091 AND emp_no < 10094 +``` + +| emp_no:integer | language_code:integer | language_name:keyword | +| --- | --- | --- | +| 10091 | 3 | Spanish | +| 10092 | 1 | English | +| 10093 | 3 | Spanish | diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md b/docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md index 94d879770ba8f..c193f0dd0684c 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/lookup-join.md @@ -52,53 +52,37 @@ In case of name collisions, the newly created columns will override existing col **IP Threat correlation**: This query would allow you to see if any source IPs match known malicious addresses. -```esql -FROM firewall_logs -| LOOKUP JOIN threat_list ON source.IP -``` +:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinSourceIp.md +::: To filter only for those rows that have a matching `threat_list` entry, use `WHERE ... IS NOT NULL` with a field from the lookup index: -```esql -FROM firewall_logs -| LOOKUP JOIN threat_list ON source.IP -| WHERE threat_level IS NOT NULL -``` +:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinSourceIpWhere.md +::: **Host metadata correlation**: This query pulls in environment or ownership details for each host to correlate with your metrics data. -```esql -FROM system_metrics -| LOOKUP JOIN host_inventory ON host.name -| LOOKUP JOIN employees ON host.name -``` +:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinHostNameTwice.md +::: **Service ownership mapping**: This query would show logs with the owning team or escalation information for faster triage and incident response. -```esql -FROM app_logs -| LOOKUP JOIN service_owners ON service_id -``` +:::{include} ../examples/docs-lookup-join.csv-spec/lookupJoinServiceId.md +::: `LOOKUP JOIN` is generally faster when there are fewer rows to join with. {{esql}} will try and perform any `WHERE` clause before the `LOOKUP JOIN` where possible. -The two following examples will have the same results. The two examples -have the `WHERE` clause before and after the `LOOKUP JOIN`. It does not +The following two examples will have the same results. One has the +`WHERE` clause before and the other after the `LOOKUP JOIN`. It does not matter how you write your query, our optimizer will move the filter before the lookup when possible. -```esql -FROM Left -| WHERE Language IS NOT NULL -| LOOKUP JOIN Right ON Key -``` +:::{include} ../examples/lookup-join.csv-spec/filterOnLeftSide.md +::: -```esql -FROM Left -| LOOKUP JOIN Right ON Key -| WHERE Language IS NOT NULL -``` +:::{include} ../examples/lookup-join.csv-spec/filterOnRightSide.md +::: diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index c66ffb37184ef..6e1cdaa90b905 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -63,15 +63,14 @@ public class CsvTestsDataLoader { private static final TestDataset APPS = new TestDataset("apps"); private static final TestDataset APPS_SHORT = APPS.withIndex("apps_short").withTypeMapping(Map.of("id", "short")); private static final TestDataset LANGUAGES = new TestDataset("languages"); - private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup") - .withSetting("languages_lookup-settings.json"); + private static final TestDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup").withSetting("lookup-settings.json"); private static final TestDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key") .withData("languages_non_unique_key.csv"); private static final TestDataset LANGUAGES_NESTED_FIELDS = new TestDataset( "languages_nested_fields", "mapping-languages_nested_fields.json", "languages_nested_fields.csv" - ).withSetting("languages_lookup-settings.json"); + ).withSetting("lookup-settings.json"); private static final TestDataset ALERTS = new TestDataset("alerts"); private static final TestDataset UL_LOGS = new TestDataset("ul_logs"); private static final TestDataset SAMPLE_DATA = new TestDataset("sample_data"); @@ -102,11 +101,17 @@ public class CsvTestsDataLoader { "partial_mapping_sample_data.csv" ).withSetting("source_parameters-settings.json"); private static final TestDataset CLIENT_IPS = new TestDataset("clientips"); - private static final TestDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup") - .withSetting("clientips_lookup-settings.json"); + private static final TestDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup").withSetting("lookup-settings.json"); private static final TestDataset MESSAGE_TYPES = new TestDataset("message_types"); private static final TestDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup") - .withSetting("message_types_lookup-settings.json"); + .withSetting("lookup-settings.json"); + private static final TestDataset FIREWALL_LOGS = new TestDataset("firewall_logs").noData(); + private static final TestDataset THREAT_LIST = new TestDataset("threat_list").withSetting("lookup-settings.json").noData(); + private static final TestDataset APP_LOGS = new TestDataset("app_logs").noData(); + private static final TestDataset SERVICE_OWNERS = new TestDataset("service_owners").withSetting("lookup-settings.json").noData(); + private static final TestDataset SYSTEM_METRICS = new TestDataset("system_metrics").noData(); + private static final TestDataset HOST_INVENTORY = new TestDataset("host_inventory").withSetting("lookup-settings.json").noData(); + private static final TestDataset OWNERSHIPS = new TestDataset("ownerships").withSetting("lookup-settings.json").noData(); private static final TestDataset CLIENT_CIDR = new TestDataset("client_cidr"); private static final TestDataset AGES = new TestDataset("ages"); private static final TestDataset HEIGHTS = new TestDataset("heights"); @@ -160,6 +165,13 @@ public class CsvTestsDataLoader { Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP), Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES), Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP), + Map.entry(FIREWALL_LOGS.indexName, FIREWALL_LOGS), + Map.entry(THREAT_LIST.indexName, THREAT_LIST), + Map.entry(APP_LOGS.indexName, APP_LOGS), + Map.entry(SERVICE_OWNERS.indexName, SERVICE_OWNERS), + Map.entry(SYSTEM_METRICS.indexName, SYSTEM_METRICS), + Map.entry(HOST_INVENTORY.indexName, HOST_INVENTORY), + Map.entry(OWNERSHIPS.indexName, OWNERSHIPS), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), Map.entry(HEIGHTS.indexName, HEIGHTS), @@ -418,11 +430,14 @@ private static URL getResource(String name) { private static void load(RestClient client, TestDataset dataset, Logger logger, IndexCreator indexCreator) throws IOException { URL mapping = getResource("/" + dataset.mappingFileName); - URL data = getResource("/data/" + dataset.dataFileName); - Settings indexSettings = dataset.readSettingsFile(); indexCreator.createIndex(client, dataset.indexName, readMappingFile(mapping, dataset.typeMapping), indexSettings); - loadCsvData(client, dataset.indexName, data, dataset.allowSubFields, logger); + + // Some examples only test that the query and mappings are valid, and don't need example data. Use .noData() for those + if (dataset.dataFileName != null) { + URL data = getResource("/data/" + dataset.dataFileName); + loadCsvData(client, dataset.indexName, data, dataset.allowSubFields, logger); + } } private static String readMappingFile(URL resource, Map typeMapping) throws IOException { @@ -697,6 +712,18 @@ public TestDataset withData(String dataFileName) { ); } + public TestDataset noData() { + return new TestDataset( + indexName, + mappingFileName, + null, + settingFileName, + allowSubFields, + typeMapping, + requiresInferenceEndpoint + ); + } + public TestDataset withSetting(String settingFileName) { return new TestDataset( indexName, diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs-lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs-lookup-join.csv-spec new file mode 100644 index 0000000000000..291da8f6cc2db --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs-lookup-join.csv-spec @@ -0,0 +1,70 @@ +########################################################### +# These tests were created specifically to satisfy the needs +# of the docs, and the lookup-join.md file in particular. +# Since those docs do not display output results, we only +# need to ensure that the tests run without error. +# This requires index mappings to be set up correctly, +# but no data needs to be loaded into the indices. +########################################################### + +# **IP Threat correlation**: This query would allow you to see if any source +# IPs match known malicious addresses. + +lookupJoinSourceIp +required_capability: join_lookup_v12 + +// tag::lookupJoinSourceIp[] +FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP +// end::lookupJoinSourceIp[] +; + +@timestamp:datetime | destination.IP:ip | message:keyword | source.IP:ip | threat_level:keyword +; + +# To filter only for those rows that have a matching `threat_list` entry, +# use `WHERE ... IS NOT NULL` with a field from the lookup index: + +lookupJoinSourceIpWhere +required_capability: join_lookup_v12 + +// tag::lookupJoinSourceIpWhere[] +FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP +| WHERE threat_level IS NOT NULL +// end::lookupJoinSourceIpWhere[] +; + +@timestamp:datetime | destination.IP:ip | message:keyword | source.IP:ip | threat_level:keyword +; + +# **Host metadata correlation**: This query pulls in environment or +# ownership details for each host to correlate with your metrics data. + +lookupJoinHostNameTwice +required_capability: join_lookup_v12 + +// tag::lookupJoinHostNameTwice[] +FROM system_metrics +| LOOKUP JOIN host_inventory ON host.name +| LOOKUP JOIN ownerships ON host.name +// end::lookupJoinHostNameTwice[] +; + +count:long | details:keyword | host.name:keyword | description:keyword | host.os:keyword | host.version:keyword | owner.name:keyword +; + +# **Service ownership mapping**: This query would show logs with the owning +# team or escalation information for faster triage and incident response. + +lookupJoinIpServiceId +required_capability: join_lookup_v12 + +// tag::lookupJoinServiceId[] +FROM app_logs +| LOOKUP JOIN service_owners ON service_id +// end::lookupJoinServiceId[] +; + +@timestamp:datetime | message:keyword | service_id:keyword | owner:keyword +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json deleted file mode 100644 index b73d1f9accf92..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_lookup-settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "index": { - "mode": "lookup" - } -} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index ee9d25c7d4474..56b35b9754c89 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -430,23 +430,48 @@ language_code:integer | language_name:keyword | country:text filterOnLeftSide required_capability: join_lookup_v12 +// tag::filterOnLeftSide[] FROM employees | EVAL language_code = languages +| WHERE emp_no >= 10091 AND emp_no < 10094 | LOOKUP JOIN languages_lookup ON language_code +// end::filterOnLeftSide[] | SORT emp_no | KEEP emp_no, language_code, language_name -| WHERE emp_no >= 10091 AND emp_no < 10094 ; +// tag::filterOnLeftSide-result[] emp_no:integer | language_code:integer | language_name:keyword 10091 | 3 | Spanish 10092 | 1 | English 10093 | 3 | Spanish +// end::filterOnLeftSide-result[] ; filterOnRightSide required_capability: join_lookup_v12 +// tag::filterOnRightSide[] +FROM employees +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup ON language_code +| WHERE emp_no >= 10091 AND emp_no < 10094 +// end::filterOnRightSide[] +| SORT emp_no +| KEEP emp_no, language_code, language_name +; + +// tag::filterOnRightSide-result[] +emp_no:integer | language_code:integer | language_name:keyword +10091 | 3 | Spanish +10092 | 1 | English +10093 | 3 | Spanish +// end::filterOnRightSide-result[] +; + +filterOnRightSideMessages +required_capability: join_lookup_v12 + FROM sample_data | LOOKUP JOIN message_types_lookup ON message | WHERE type == "Error" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-settings.json similarity index 100% rename from x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json rename to x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-settings.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-app_logs.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-app_logs.json new file mode 100644 index 0000000000000..94c7697015179 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-app_logs.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "service_id": { + "type": "keyword" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-firewall_logs.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-firewall_logs.json new file mode 100644 index 0000000000000..90f073ee8169e --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-firewall_logs.json @@ -0,0 +1,24 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "source": { + "properties": { + "IP": { + "type": "ip" + } + } + }, + "destination": { + "properties": { + "IP": { + "type": "ip" + } + } + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-host_inventory.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-host_inventory.json new file mode 100644 index 0000000000000..516ca0f09d2ec --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-host_inventory.json @@ -0,0 +1,20 @@ +{ + "properties": { + "host": { + "properties": { + "name": { + "type": "keyword" + }, + "os": { + "type": "keyword" + }, + "version": { + "type": "keyword" + } + } + }, + "description": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-ownerships.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-ownerships.json new file mode 100644 index 0000000000000..40dadaad80eae --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-ownerships.json @@ -0,0 +1,18 @@ +{ + "properties": { + "host": { + "properties": { + "name": { + "type": "keyword" + } + } + }, + "owner": { + "properties": { + "name": { + "type": "keyword" + } + } + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-service_owners.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-service_owners.json new file mode 100644 index 0000000000000..406573fc07ff5 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-service_owners.json @@ -0,0 +1,10 @@ +{ + "properties": { + "service_id": { + "type": "keyword" + }, + "owner": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-system_metrics.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-system_metrics.json new file mode 100644 index 0000000000000..c4694de3211b4 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-system_metrics.json @@ -0,0 +1,17 @@ +{ + "properties": { + "host": { + "properties": { + "name": { + "type": "keyword" + } + } + }, + "count": { + "type": "long" + }, + "details": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-threat_list.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-threat_list.json new file mode 100644 index 0000000000000..a9929591b4862 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-threat_list.json @@ -0,0 +1,14 @@ +{ + "properties": { + "source": { + "properties": { + "IP": { + "type": "ip" + } + } + }, + "threat_level": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json deleted file mode 100644 index b73d1f9accf92..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "index": { - "mode": "lookup" - } -}