Skip to content

Commit cfc7364

Browse files
committed
refactor schema validation
1 parent c29d07a commit cfc7364

File tree

5 files changed

+466
-520
lines changed

5 files changed

+466
-520
lines changed

detection_rules/beats.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,17 +181,20 @@ def get_beats_sub_schema(schema: dict[str, Any], beat: str, module: str, *datase
181181

182182
flattened: list[dict[str, Any]] = []
183183
beat_dir = schema[beat]
184-
module_dir = beat_dir.get("folders", {}).get("module", {}).get("folders", {}).get(module, {})
184+
# Normalize module name in case callers include quotes from rendered AST
185+
normalized_module = module.strip("\"' ")
186+
module_dir = beat_dir.get("folders", {}).get("module", {}).get("folders", {}).get(normalized_module, {})
185187

186188
# if we only have a module then we'll work with what we got
187189
all_datasets = datasets if datasets else [d for d in module_dir.get("folders", {}) if not d.startswith("_")]
188190

189191
for _dataset in all_datasets:
190192
# replace aws.s3 -> s3
191-
dataset = _dataset[len(module) + 1 :] if _dataset.startswith(module + ".") else _dataset
193+
ds = _dataset.strip("\"' ")
194+
dataset = ds[len(normalized_module) + 1 :] if ds.startswith(normalized_module + ".") else ds
192195

193196
dataset_dir = module_dir.get("folders", {}).get(dataset, {})
194-
flattened.extend(get_field_schema(dataset_dir, prefix=module + ".", include_common=True))
197+
flattened.extend(get_field_schema(dataset_dir, prefix=normalized_module + ".", include_common=True))
195198

196199
# we also need to capture (beta?) fields which are directly within the module _meta.files.fields
197200
flattened.extend(get_field_schema(module_dir, include_common=True))
@@ -268,11 +271,11 @@ def get_datasets_and_modules(tree: eql.ast.BaseNode | kql.ast.BaseNode) -> tuple
268271
and isinstance(node.right, eql.ast.String)
269272
):
270273
if node.left == eql.ast.Field("event", ["module"]):
271-
modules.add(node.right.render()) # type: ignore[reportUnknownMemberType]
274+
modules.add(node.right.value) # type: ignore[reportUnknownMemberType]
272275
elif node.left == eql.ast.Field("event", ["dataset"]) or node.left == eql.ast.Field(
273276
"data_stream", ["dataset"]
274277
):
275-
datasets.add(node.right.render()) # type: ignore[reportUnknownMemberType]
278+
datasets.add(node.right.value) # type: ignore[reportUnknownMemberType]
276279
elif isinstance(node, eql.ast.InSet):
277280
if node.expression == eql.ast.Field("event", ["module"]):
278281
modules.update(node.get_literals()) # type: ignore[reportUnknownMemberType]

detection_rules/rule.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,8 +710,8 @@ def get_beats_schema(
710710
@cached
711711
def get_endgame_schema(self, indices: list[str], endgame_version: str) -> endgame.EndgameSchema | None:
712712
"""Get an assembled flat endgame schema."""
713-
714-
if indices and "endgame-*" not in indices:
713+
# Only include endgame when explicitly requested by TOML via indices
714+
if not indices or "endgame-*" not in indices:
715715
return None
716716

717717
endgame_schema = endgame.read_endgame_schema(endgame_version=endgame_version)

0 commit comments

Comments
 (0)