Skip to content

Commit f9d4dba

Browse files
Update Validation Errors for Index handling
1 parent 013ad5f commit f9d4dba

File tree

7 files changed

+157
-102
lines changed

7 files changed

+157
-102
lines changed

CLI.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ For instance, some users may want to increase the default value in cases where h
5151

5252
Using the environment variable `DR_REMOTE_ESQL_VALIDATION` will enable remote ESQL validation for rules that use ESQL queries. This validation will be performed whenever the rule is loaded including for example the view-rule command. This requires the appropriate kibana_url or cloud_id, api_key, and es_url to be set in the config file or as environment variables.
5353

54+
Using the environment variable `DR_SKIP_EMPTY_INDEX_CLEANUP` will disable the cleanup of remote testing indexes that are created as part of the remote ESQL validation. By default, these indexes are deleted after the validation is complete, or upon validation error.
55+
5456
## Importing rules into the repo
5557

5658
You can import rules into the repo using the `create-rule` or `import-rules-to-repo` commands. Both of these commands will

detection_rules/esql_errors.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
"""ESQL exceptions."""
22

3+
from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs]
4+
5+
from .misc import getdefault
6+
37
__all__ = (
48
"EsqlSchemaError",
59
"EsqlSemanticError",
@@ -8,29 +12,45 @@
812
)
913

1014

15+
def cleanup_empty_indices(
16+
elastic_client: Elasticsearch, index_patterns: tuple[str, ...] = ("rule-test-*", "test-*")
17+
) -> None:
18+
"""Delete empty indices matching the given patterns."""
19+
if getdefault("skip_empty_index_cleanup")():
20+
return
21+
for pattern in index_patterns:
22+
indices = elastic_client.cat.indices(index=pattern, format="json")
23+
empty_indices = [index["index"] for index in indices if index["docs.count"] == "0"] # type: ignore[reportMissingTypeStubs]
24+
for empty_index in empty_indices:
25+
_ = elastic_client.indices.delete(index=empty_index)
26+
27+
1128
class EsqlSchemaError(Exception):
1229
"""Error in ESQL schema. Validated via Kibana until AST is available."""
1330

14-
def __init__(self, message: str):
31+
def __init__(self, message: str, elastic_client: Elasticsearch) -> None:
32+
cleanup_empty_indices(elastic_client)
1533
super().__init__(message)
1634

1735

1836
class EsqlSyntaxError(Exception):
1937
"""Error with ESQL syntax. Validated via Kibana until AST is available."""
2038

21-
def __init__(self, message: str):
39+
def __init__(self, message: str, elastic_client: Elasticsearch) -> None:
40+
cleanup_empty_indices(elastic_client)
2241
super().__init__(message)
2342

2443

2544
class EsqlSemanticError(Exception):
2645
"""Error with ESQL semantics. Validated via Kibana until AST is available."""
2746

28-
def __init__(self, message: str):
47+
def __init__(self, message: str, elastic_client: Elasticsearch) -> None:
48+
cleanup_empty_indices(elastic_client)
2949
super().__init__(message)
3050

3151

3252
class EsqlTypeMismatchError(Exception):
3353
"""Error when validating types in ESQL."""
3454

35-
def __init__(self, message: str):
55+
def __init__(self, message: str) -> None:
3656
super().__init__(message)

detection_rules/index_mappings.py

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,30 +11,95 @@
1111

1212
from elastic_transport import ObjectApiResponse
1313
from elasticsearch import Elasticsearch # type: ignore[reportMissingTypeStubs]
14+
from elasticsearch.exceptions import BadRequestError
1415
from semver import Version
1516

1617
from . import ecs, integrations, misc, utils
1718
from .config import load_current_package_version
19+
from .esql_errors import EsqlSchemaError, EsqlSemanticError, EsqlSyntaxError, cleanup_empty_indices
1820
from .integrations import (
1921
load_integrations_manifests,
2022
load_integrations_schemas,
2123
)
2224
from .rule import RuleMeta
2325
from .schemas import get_stack_schemas
26+
from .schemas.definitions import HTTP_STATUS_BAD_REQUEST
27+
from .utils import combine_dicts
2428

2529

2630
def get_rule_integrations(metadata: RuleMeta) -> list[str]:
2731
"""Retrieve rule integrations from metadata."""
28-
rule_integrations: list[str] = []
2932
if metadata.integration:
30-
if isinstance(metadata.integration, list):
31-
rule_integrations = metadata.integration
32-
else:
33-
rule_integrations = [metadata.integration]
33+
rule_integrations: list[str] = (
34+
metadata.integration if isinstance(metadata.integration, list) else [metadata.integration]
35+
)
36+
else:
37+
rule_integrations: list[str] = []
3438
return rule_integrations
3539

3640

37-
def prepare_integration_mappings(
41+
def create_index_with_index_mapping(
42+
elastic_client: Elasticsearch, index_name: str, mappings: dict[str, Any]
43+
) -> ObjectApiResponse[Any] | None:
44+
"""Create an index with the specified mappings and settings to support large number of fields and nested objects."""
45+
try:
46+
return elastic_client.indices.create(
47+
index=index_name,
48+
mappings={"properties": mappings},
49+
settings={
50+
"index.mapping.total_fields.limit": 10000,
51+
"index.mapping.nested_fields.limit": 500,
52+
"index.mapping.nested_objects.limit": 10000,
53+
},
54+
)
55+
except BadRequestError as e:
56+
error_message = str(e)
57+
if (
58+
e.status_code == HTTP_STATUS_BAD_REQUEST
59+
and "validation_exception" in error_message
60+
and "Validation Failed: 1: this action would add [2] shards" in error_message
61+
):
62+
cleanup_empty_indices(elastic_client)
63+
try:
64+
return elastic_client.indices.create(
65+
index=index_name,
66+
mappings={"properties": mappings},
67+
settings={
68+
"index.mapping.total_fields.limit": 10000,
69+
"index.mapping.nested_fields.limit": 500,
70+
"index.mapping.nested_objects.limit": 10000,
71+
},
72+
)
73+
except BadRequestError as retry_error:
74+
raise EsqlSchemaError(str(retry_error), elastic_client) from retry_error
75+
raise EsqlSchemaError(error_message, elastic_client) from e
76+
77+
78+
def get_existing_mappings(elastic_client: Elasticsearch, indices: list[str]) -> tuple[dict[str, Any], dict[str, Any]]:
79+
"""Retrieve mappings for all matching existing index templates."""
80+
existing_mappings: dict[str, Any] = {}
81+
index_lookup: dict[str, Any] = {}
82+
for index in indices:
83+
index_tmpl_mappings = get_simulated_index_template_mappings(elastic_client, index)
84+
index_lookup[index] = index_tmpl_mappings
85+
combine_dicts(existing_mappings, index_tmpl_mappings)
86+
return existing_mappings, index_lookup
87+
88+
89+
def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]:
90+
"""
91+
Return the mappings from the index configuration that would be applied
92+
to the specified index from an existing index template
93+
94+
https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template
95+
"""
96+
template = elastic_client.indices.simulate_index_template(name=name)
97+
if not template:
98+
return {}
99+
return template["template"]["mappings"]["properties"]
100+
101+
102+
def prepare_integration_mappings( # noqa: PLR0913
38103
rule_integrations: list[str],
39104
event_dataset_integrations: list[utils.EventDataset],
40105
package_manifests: Any,
@@ -97,14 +162,14 @@ def create_remote_indices(
97162
"""Create remote indices for validation and return the index string."""
98163
suffix = str(int(time.time() * 1000))
99164
test_index = f"rule-test-index-{suffix}"
100-
response = misc.create_index_with_index_mapping(elastic_client, test_index, existing_mappings)
165+
response = create_index_with_index_mapping(elastic_client, test_index, existing_mappings)
101166
log(f"Index `{test_index}` created: {response}")
102167
full_index_str = test_index
103168

104169
# create all integration indices
105170
for index, properties in index_lookup.items():
106171
ind_index_str = f"test-{index.rstrip('*')}{suffix}"
107-
response = misc.create_index_with_index_mapping(elastic_client, ind_index_str, properties)
172+
response = create_index_with_index_mapping(elastic_client, ind_index_str, properties)
108173
log(f"Index `{ind_index_str}` created: {response}")
109174
full_index_str = f"{full_index_str}, {ind_index_str}"
110175

@@ -124,8 +189,13 @@ def execute_query_against_indices(
124189
response = elastic_client.esql.query(query=query)
125190
log(f"Got query response: {response}")
126191
query_columns = response.get("columns", [])
192+
except BadRequestError as e:
193+
error_msg = str(e)
194+
if "parsing_exception" in error_msg:
195+
raise EsqlSyntaxError(str(e), elastic_client) from e
196+
raise EsqlSemanticError(str(e), elastic_client) from e
127197
finally:
128-
if delete_indices:
198+
if delete_indices or misc.getdefault("skip_empty_index_cleanup")():
129199
for index_str in test_index_str.split(","):
130200
response = elastic_client.indices.delete(index=index_str.strip())
131201
log(f"Test index `{index_str}` deleted: {response}")
@@ -182,7 +252,7 @@ def get_ecs_schema_mappings(current_version: Version) -> dict[str, Any]:
182252
return ecs_schema
183253

184254

185-
def prepare_mappings(
255+
def prepare_mappings( # noqa: PLR0913
186256
elastic_client: Elasticsearch,
187257
indices: list[str],
188258
event_dataset_integrations: list[utils.EventDataset],
@@ -191,7 +261,7 @@ def prepare_mappings(
191261
log: Callable[[str], None],
192262
) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
193263
"""Prepare index mappings for the given indices and rule integrations."""
194-
existing_mappings, index_lookup = misc.get_existing_mappings(elastic_client, indices)
264+
existing_mappings, index_lookup = get_existing_mappings(elastic_client, indices)
195265

196266
# Collect mappings for the integrations
197267
rule_integrations = get_rule_integrations(metadata)

detection_rules/misc.py

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,10 @@
1414

1515
import click
1616
import requests
17-
from elastic_transport import ObjectApiResponse
1817
from elasticsearch import AuthenticationException, Elasticsearch
19-
from elasticsearch.exceptions import BadRequestError
2018
from kibana import Kibana # type: ignore[reportMissingTypeStubs]
2119

22-
from .esql_errors import EsqlSchemaError
23-
from .utils import add_params, cached, combine_dicts, load_etc_dump
20+
from .utils import add_params, cached, load_etc_dump
2421

2522
LICENSE_HEADER = """
2623
Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
@@ -412,46 +409,3 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any: # noqa: PLR0912
412409
return _wrapped
413410

414411
return _wrapper
415-
416-
417-
def get_simulated_index_template_mappings(elastic_client: Elasticsearch, name: str) -> dict[str, Any]:
418-
"""
419-
Return the mappings from the index configuration that would be applied
420-
to the specified index from an existing index template
421-
422-
https://elasticsearch-py.readthedocs.io/en/stable/api/indices.html#elasticsearch.client.IndicesClient.simulate_index_template
423-
"""
424-
template = elastic_client.indices.simulate_index_template(name=name)
425-
if not template:
426-
return {}
427-
return template["template"]["mappings"]["properties"]
428-
429-
430-
def create_index_with_index_mapping(
431-
elastic_client: Elasticsearch, index_name: str, mappings: dict[str, Any]
432-
) -> ObjectApiResponse[Any] | None:
433-
"""Create an index with the specified mappings and settings to support large number of fields and nested objects."""
434-
try:
435-
return elastic_client.indices.create(
436-
index=index_name,
437-
mappings={"properties": mappings},
438-
settings={
439-
"index.mapping.total_fields.limit": 10000,
440-
"index.mapping.nested_fields.limit": 500,
441-
"index.mapping.nested_objects.limit": 10000,
442-
},
443-
)
444-
except BadRequestError as e:
445-
if e.status_code == 400 and "validation_exception" in str(e):
446-
raise EsqlSchemaError(str(e)) from e
447-
448-
449-
def get_existing_mappings(elastic_client: Elasticsearch, indices: list[str]) -> tuple[dict[str, Any], dict[str, Any]]:
450-
"""Retrieve mappings for all matching existing index templates."""
451-
existing_mappings: dict[str, Any] = {}
452-
index_lookup: dict[str, Any] = {}
453-
for index in indices:
454-
index_tmpl_mappings = get_simulated_index_template_mappings(elastic_client, index)
455-
index_lookup[index] = index_tmpl_mappings
456-
combine_dicts(existing_mappings, index_tmpl_mappings)
457-
return existing_mappings, index_lookup

detection_rules/rule_validators.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,12 @@ def log(self, val: str) -> None:
736736
if self.verbosity >= unit_test_verbose_level:
737737
print(f"{self.rule_id}:", val)
738738

739+
@property
740+
def ast(self) -> Any:
741+
"""Return the AST of the ESQL query. Dependant in ESQL parser which is not implemented"""
742+
# Needs to return none to prevent not implemented error
743+
return None
744+
739745
@cached_property
740746
def unique_fields(self) -> list[str]: # type: ignore[reportIncompatibleMethodOverride]
741747
"""Return a list of unique fields in the query. Requires remote validation to have occurred."""
@@ -791,20 +797,23 @@ def validate(self, data: "QueryRuleData", rule_meta: RuleMeta, force_remote_vali
791797
if option.name is not None
792798
}
793799

794-
kibana_client = misc.get_kibana_client(**resolved_kibana_options)
795800
resolved_elastic_options = {
796801
option.name: option.default() if callable(option.default) else option.default
797802
for option in misc.elasticsearch_options
798803
if option.name is not None
799804
}
800-
elastic_client = misc.get_elasticsearch_client(**resolved_elastic_options)
801-
_ = self.remote_validate_rule(
802-
kibana_client,
803-
elastic_client,
804-
data.query,
805-
rule_meta,
806-
data.rule_id,
807-
)
805+
806+
with (
807+
misc.get_kibana_client(**resolved_kibana_options) as kibana_client, # type: ignore[reportUnknownVariableType]
808+
misc.get_elasticsearch_client(**resolved_elastic_options) as elastic_client, # type: ignore[reportUnknownVariableType]
809+
):
810+
_ = self.remote_validate_rule(
811+
kibana_client,
812+
elastic_client,
813+
data.query,
814+
rule_meta,
815+
data.rule_id,
816+
)
808817

809818
def remote_validate_rule_contents(
810819
self, kibana_client: Kibana, elastic_client: Elasticsearch, contents: TOMLRuleContents, verbosity: int = 0

detection_rules/schemas/definitions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def validator_wrapper(value: Any) -> Any:
5656
return validator_wrapper
5757

5858

59+
HTTP_STATUS_BAD_REQUEST = 400
5960
ASSET_TYPE = "security_rule"
6061
SAVED_OBJECT_TYPE = "security-rule"
6162

0 commit comments

Comments
 (0)