@@ -727,8 +727,14 @@ def log(val: str) -> None:
727727
728728 existing_mappings : dict [str , Any ] = {}
729729
730+ # TODO do we need an index mapping for each index in the query? This is accomplished via index_lookup: dict[str, Any] = {}
731+ # Do we also need separate indexes for each integration? Probably, at least it is dynamic for each rule (do we really want to load these per rule?)
732+ index_lookup : dict [str , Any ] = {}
733+ # NOTE do we need to cache what integration indexes have been loaded to prevent pushing tons into the evaluation?
734+
730735 for index in indices :
731736 index_tmpl_mappings = get_simulated_template_mappings (elastic_client , index )
737+ index_lookup [index ] = index_tmpl_mappings
732738 combine_dicts (existing_mappings , index_tmpl_mappings )
733739
734740 log (f"Collected mappings: { len (existing_mappings )} " )
@@ -769,34 +775,62 @@ def log(val: str) -> None:
769775 for stream in package_schema :
770776 flat_schema = package_schema [stream ]
771777 stream_mappings = flat_schema_to_mapping (flat_schema )
778+ # NOTE perhaps we need to actually create many test indexes for this to work properly
779+ # TODO update this for double defined cases like integration_mappings["aws"]["properties"]["inspector"]["properties"]["remediation"]
780+ # FIXED VIA NESTED FIELDS
781+ # which is both a keyword, and has fields
782+ # "aws.properties.inspector.properties.remediation.type": "keyword",
783+ # "aws.properties.inspector.properties.remediation.fields.recommendation.properties.text.type": "keyword",
772784 combine_dicts (integration_mappings , stream_mappings )
785+ index_lookup [f"{ integration } -{ stream } " ] = stream_mappings
773786
774787 log (f"Integration mappings prepared: { len (integration_mappings )} " )
775788
776789 combined_mappings = {}
777790 combine_dicts (combined_mappings , existing_mappings )
778791 combine_dicts (combined_mappings , integration_mappings )
779792 # NOTE non-ecs schema needs to have formatting updates prior to merge
780- # NOTE non-ecs schema uses Kibana reserved word "properties" as a field name
781- # e.g. "azure.auditlogs.properties.target_resources.0.display_name": "keyword",
793+ # NOTE non-ecs and ecs schema can conflict e.g. 'authentication_details': {'type': 'flattened'}
794+ # FIXED VIA NESTED FIELDS
795+ # "azure.signinlogs.properties.authentication_details.authentication_method": "keyword"
796+ # FAILURE: BadRequestError(400, 'illegal_argument_exception', "can't merge a non object mapping [azure.signinlogs.properties.authentication_details] with an object mapping")
782797 non_ecs_mapping = {}
783798 non_ecs = ecs .get_non_ecs_schema ()
784799 for index in indices :
785800 non_ecs_mapping .update (non_ecs .get (index , {}))
786801 non_ecs_mapping = ecs .flatten (non_ecs_mapping )
787802 non_ecs_mapping = convert_to_nested_schema (non_ecs_mapping )
788- if non_ecs_mapping :
789- combine_dicts (combined_mappings , non_ecs_mapping )
790-
791- if not combined_mappings :
803+ if not combined_mappings and not non_ecs_mapping :
792804 log ("ERROR: no mappings found for the rule" )
793805 raise ValueError ("No mappings found" )
794806
795807 # Creating a test index with the test name
796808 suffix = str (int (time .time () * 1000 ))
797809 test_index = f"rule-test-index-{ suffix } "
810+ test_non_ecs_index = f"rule-test-non-ecs-index-{ suffix } "
811+ # TODO if works, switch to non-ecs index only
812+ # NOTE we will always have to have a base test index
813+ # This test index could have the index_tmpl_mappings for example
814+ full_index_str = test_index
815+ if non_ecs_mapping :
816+ full_index_str = test_non_ecs_index
817+
818+ for index in index_lookup :
819+ # log(f"Mappings for `{index}`: {index_lookup[index]}")
820+ ind_index_str = f"test-{ index .rstrip ('*' )} { suffix } "
821+ response = elastic_client .indices .create (
822+ index = ind_index_str ,
823+ mappings = {"properties" : index_lookup [index ]},
824+ settings = {
825+ "index.mapping.total_fields.limit" : 10000 ,
826+ "index.mapping.nested_fields.limit" : 500 ,
827+ "index.mapping.nested_objects.limit" : 10000 ,
828+ },
829+ )
830+ log (f"Index `{ test_non_ecs_index } ` created: { response } " )
831+ full_index_str = f"{ full_index_str } , { ind_index_str } "
798832
799- # creating an index
833+ # create indexes
800834 response = elastic_client .indices .create (
801835 index = test_index ,
802836 mappings = {"properties" : combined_mappings },
@@ -807,19 +841,39 @@ def log(val: str) -> None:
807841 },
808842 )
809843 log (f"Index `{ test_index } ` created: { response } " )
844+ test_index_str = test_index
845+ if non_ecs_mapping :
846+ response = elastic_client .indices .create (
847+ index = test_non_ecs_index ,
848+ mappings = {"properties" : non_ecs_mapping },
849+ settings = {
850+ "index.mapping.total_fields.limit" : 10000 ,
851+ "index.mapping.nested_fields.limit" : 500 ,
852+ "index.mapping.nested_objects.limit" : 10000 ,
853+ },
854+ )
855+ log (f"Index `{ test_non_ecs_index } ` created: { response } " )
856+ test_index_str = f"{ test_index } , { test_non_ecs_index } "
810857
811858 # Replace all sources with the test index
812859 query = contents .data .query
813- query = query .replace (indices_str , test_index )
860+ query = query .replace (indices_str , full_index_str )
814861
815862 try :
816- log (f"Executing a query against `{ test_index } `" )
863+ log (f"Executing a query against `{ test_index_str } `" )
817864 response = elastic_client .esql .query (query = query )
818865 log (f"Got query response: { response } " )
819866 query_columns = response .get ("columns" , [])
820867 finally :
821868 response = elastic_client .indices .delete (index = test_index )
822869 log (f"Test index `{ test_index } ` deleted: { response } " )
870+ if non_ecs_mapping :
871+ response = elastic_client .indices .delete (index = test_non_ecs_index )
872+ log (f"Test index `{ test_non_ecs_index } ` deleted: { response } " )
873+ for index in index_lookup :
874+ ind_index_str = f"test-{ index .rstrip ('*' )} { suffix } "
875+ response = elastic_client .indices .delete (index = ind_index_str )
876+ log (f"Test index `{ ind_index_str } ` deleted: { response } " )
823877
824878 query_column_names = [c ["name" ] for c in query_columns ]
825879 log (f"Got query columns: { ', ' .join (query_column_names )} " )
0 commit comments