Skip to content

Commit 6329493

Browse files
Add optional multi index method
1 parent e86a807 commit 6329493

File tree

1 file changed

+63
-9
lines changed

1 file changed

+63
-9
lines changed

detection_rules/rule_validators.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -727,8 +727,14 @@ def log(val: str) -> None:
727727

728728
existing_mappings: dict[str, Any] = {}
729729

730+
# TODO do we need an index mapping for each index in the query? This is accomplished via index_lookup: dict[str, Any] = {}
731+
# Do we also need separate indexes for each integration? Probably, at least it is dynamic for each rule (do we really want to load these per rule?)
732+
index_lookup: dict[str, Any] = {}
733+
# NOTE do we need to cache what integration indexes have been loaded to prevent pushing tons into the evaluation?
734+
730735
for index in indices:
731736
index_tmpl_mappings = get_simulated_template_mappings(elastic_client, index)
737+
index_lookup[index] = index_tmpl_mappings
732738
combine_dicts(existing_mappings, index_tmpl_mappings)
733739

734740
log(f"Collected mappings: {len(existing_mappings)}")
@@ -769,34 +775,62 @@ def log(val: str) -> None:
769775
for stream in package_schema:
770776
flat_schema = package_schema[stream]
771777
stream_mappings = flat_schema_to_mapping(flat_schema)
778+
# NOTE perhaps we need to actually create many test indexes for this to work properly
779+
# TODO update this for double defined cases like integration_mappings["aws"]["properties"]["inspector"]["properties"]["remediation"]
780+
# FIXED VIA NESTED FIELDS
781+
# which is both a keyword, and has fields
782+
# "aws.properties.inspector.properties.remediation.type": "keyword",
783+
# "aws.properties.inspector.properties.remediation.fields.recommendation.properties.text.type": "keyword",
772784
combine_dicts(integration_mappings, stream_mappings)
785+
index_lookup[f"{integration}-{stream}"] = stream_mappings
773786

774787
log(f"Integration mappings prepared: {len(integration_mappings)}")
775788

776789
combined_mappings = {}
777790
combine_dicts(combined_mappings, existing_mappings)
778791
combine_dicts(combined_mappings, integration_mappings)
779792
# NOTE non-ecs schema needs to have formatting updates prior to merge
780-
# NOTE non-ecs schema uses Kibana reserved word "properties" as a field name
781-
# e.g. "azure.auditlogs.properties.target_resources.0.display_name": "keyword",
793+
# NOTE non-ecs and ecs schema can conflict e.g. 'authentication_details': {'type': 'flattened'}
794+
# FIXED VIA NESTED FIELDS
795+
# "azure.signinlogs.properties.authentication_details.authentication_method": "keyword"
796+
# FAILURE: BadRequestError(400, 'illegal_argument_exception', "can't merge a non object mapping [azure.signinlogs.properties.authentication_details] with an object mapping")
782797
non_ecs_mapping = {}
783798
non_ecs = ecs.get_non_ecs_schema()
784799
for index in indices:
785800
non_ecs_mapping.update(non_ecs.get(index, {}))
786801
non_ecs_mapping = ecs.flatten(non_ecs_mapping)
787802
non_ecs_mapping = convert_to_nested_schema(non_ecs_mapping)
788-
if non_ecs_mapping:
789-
combine_dicts(combined_mappings, non_ecs_mapping)
790-
791-
if not combined_mappings:
803+
if not combined_mappings and not non_ecs_mapping:
792804
log("ERROR: no mappings found for the rule")
793805
raise ValueError("No mappings found")
794806

795807
# Creating a test index with the test name
796808
suffix = str(int(time.time() * 1000))
797809
test_index = f"rule-test-index-{suffix}"
810+
test_non_ecs_index = f"rule-test-non-ecs-index-{suffix}"
811+
# TODO if works, switch to non-ecs index only
812+
# NOTE we will always have to have a base test index
813+
# This test index could have the index_tmpl_mappings for example
814+
full_index_str = test_index
815+
if non_ecs_mapping:
816+
full_index_str = test_non_ecs_index
817+
818+
for index in index_lookup:
819+
# log(f"Mappings for `{index}`: {index_lookup[index]}")
820+
ind_index_str = f"test-{index.rstrip('*')}{suffix}"
821+
response = elastic_client.indices.create(
822+
index=ind_index_str,
823+
mappings={"properties": index_lookup[index]},
824+
settings={
825+
"index.mapping.total_fields.limit": 10000,
826+
"index.mapping.nested_fields.limit": 500,
827+
"index.mapping.nested_objects.limit": 10000,
828+
},
829+
)
830+
log(f"Index `{test_non_ecs_index}` created: {response}")
831+
full_index_str = f"{full_index_str}, {ind_index_str}"
798832

799-
# creating an index
833+
# create indexes
800834
response = elastic_client.indices.create(
801835
index=test_index,
802836
mappings={"properties": combined_mappings},
@@ -807,19 +841,39 @@ def log(val: str) -> None:
807841
},
808842
)
809843
log(f"Index `{test_index}` created: {response}")
844+
test_index_str = test_index
845+
if non_ecs_mapping:
846+
response = elastic_client.indices.create(
847+
index=test_non_ecs_index,
848+
mappings={"properties": non_ecs_mapping},
849+
settings={
850+
"index.mapping.total_fields.limit": 10000,
851+
"index.mapping.nested_fields.limit": 500,
852+
"index.mapping.nested_objects.limit": 10000,
853+
},
854+
)
855+
log(f"Index `{test_non_ecs_index}` created: {response}")
856+
test_index_str = f"{test_index}, {test_non_ecs_index}"
810857

811858
# Replace all sources with the test index
812859
query = contents.data.query
813-
query = query.replace(indices_str, test_index)
860+
query = query.replace(indices_str, full_index_str)
814861

815862
try:
816-
log(f"Executing a query against `{test_index}`")
863+
log(f"Executing a query against `{test_index_str}`")
817864
response = elastic_client.esql.query(query=query)
818865
log(f"Got query response: {response}")
819866
query_columns = response.get("columns", [])
820867
finally:
821868
response = elastic_client.indices.delete(index=test_index)
822869
log(f"Test index `{test_index}` deleted: {response}")
870+
if non_ecs_mapping:
871+
response = elastic_client.indices.delete(index=test_non_ecs_index)
872+
log(f"Test index `{test_non_ecs_index}` deleted: {response}")
873+
for index in index_lookup:
874+
ind_index_str = f"test-{index.rstrip('*')}{suffix}"
875+
response = elastic_client.indices.delete(index=ind_index_str)
876+
log(f"Test index `{ind_index_str}` deleted: {response}")
823877

824878
query_column_names = [c["name"] for c in query_columns]
825879
log(f"Got query columns: {', '.join(query_column_names)}")

0 commit comments

Comments
 (0)