Skip to content

Commit f0f7d21

Browse files
authored
[FR] Refactor Schema Validation & Support Multi-Dataset Sequence Validation (#5059)
1 parent 25539fd commit f0f7d21

File tree

7 files changed

+784
-480
lines changed

7 files changed

+784
-480
lines changed

detection_rules/beats.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,17 +181,20 @@ def get_beats_sub_schema(schema: dict[str, Any], beat: str, module: str, *datase
181181

182182
flattened: list[dict[str, Any]] = []
183183
beat_dir = schema[beat]
184-
module_dir = beat_dir.get("folders", {}).get("module", {}).get("folders", {}).get(module, {})
184+
# Normalize module name in case callers include quotes from rendered AST
185+
normalized_module = module.strip("\"' ")
186+
module_dir = beat_dir.get("folders", {}).get("module", {}).get("folders", {}).get(normalized_module, {})
185187

186188
# if we only have a module then we'll work with what we got
187189
all_datasets = datasets if datasets else [d for d in module_dir.get("folders", {}) if not d.startswith("_")]
188190

189191
for _dataset in all_datasets:
190192
# replace aws.s3 -> s3
191-
dataset = _dataset[len(module) + 1 :] if _dataset.startswith(module + ".") else _dataset
193+
ds = _dataset.strip("\"' ")
194+
dataset = ds[len(normalized_module) + 1 :] if ds.startswith(normalized_module + ".") else ds
192195

193196
dataset_dir = module_dir.get("folders", {}).get(dataset, {})
194-
flattened.extend(get_field_schema(dataset_dir, prefix=module + ".", include_common=True))
197+
flattened.extend(get_field_schema(dataset_dir, prefix=normalized_module + ".", include_common=True))
195198

196199
# we also need to capture (beta?) fields which are directly within the module _meta.files.fields
197200
flattened.extend(get_field_schema(module_dir, include_common=True))
@@ -268,11 +271,11 @@ def get_datasets_and_modules(tree: eql.ast.BaseNode | kql.ast.BaseNode) -> tuple
268271
and isinstance(node.right, eql.ast.String)
269272
):
270273
if node.left == eql.ast.Field("event", ["module"]):
271-
modules.add(node.right.render()) # type: ignore[reportUnknownMemberType]
274+
modules.add(node.right.value) # type: ignore[reportUnknownMemberType]
272275
elif node.left == eql.ast.Field("event", ["dataset"]) or node.left == eql.ast.Field(
273276
"data_stream", ["dataset"]
274277
):
275-
datasets.add(node.right.render()) # type: ignore[reportUnknownMemberType]
278+
datasets.add(node.right.value) # type: ignore[reportUnknownMemberType]
276279
elif isinstance(node, eql.ast.InSet):
277280
if node.expression == eql.ast.Field("event", ["module"]):
278281
modules.update(node.get_literals()) # type: ignore[reportUnknownMemberType]

detection_rules/etc/non-ecs-schema.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
{
2+
"auditbeat-*": {
3+
"auditd.data.addr": "keyword",
4+
"auditd.data.grantors": "keyword",
5+
"auditd.data.syscall": "keyword",
6+
"auditd.data.terminal": "keyword",
7+
"auditd.result": "keyword"
8+
},
29
"endgame-*": {
310
"endgame": {
411
"metadata": {
@@ -8,6 +15,9 @@
815
}
916
},
1017
"winlogbeat-*": {
18+
"problemchild.prediction": "long",
19+
"problemchild.prediction_probability": "long",
20+
"blocklist_label": "long",
1121
"winlog": {
1222
"event_data": {
1323
"AccessList": "keyword",
@@ -17,6 +27,7 @@
1727
"AllowedToDelegateTo": "keyword",
1828
"AttributeLDAPDisplayName": "keyword",
1929
"AttributeValue": "keyword",
30+
"AuditPolicyChangesDescription": "keyword",
2031
"CallerProcessName": "keyword",
2132
"CallTrace": "keyword",
2233
"ClientProcessId": "keyword",
@@ -57,7 +68,9 @@
5768
"Status": "keyword",
5869
"EnabledPrivilegeList": "keyword",
5970
"Operation": "keyword",
60-
"OperationType": "keyword"
71+
"OperationType": "keyword",
72+
"NewUACList": "keyword",
73+
"SubCategory": "keyword"
6174
}
6275
},
6376
"winlog.logon.type": "keyword",
@@ -199,6 +212,7 @@
199212
"azure.platformlogs.properties.id": "keyword"
200213
},
201214
"logs-o365.audit-*": {
215+
"o365.audit.ExtendedProperties.RequestType": "keyword",
202216
"o365.audit.ExtendedProperties.ResultStatusDetail": "keyword",
203217
"o365.audit.OperationProperties.Name": "keyword",
204218
"o365.audit.OperationProperties.Value": "keyword",

detection_rules/rule.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,8 +710,8 @@ def get_beats_schema(
710710
@cached
711711
def get_endgame_schema(self, indices: list[str], endgame_version: str) -> endgame.EndgameSchema | None:
712712
"""Get an assembled flat endgame schema."""
713-
714-
if indices and "endgame-*" not in indices:
713+
# Only include endgame when explicitly requested by TOML via indices
714+
if not indices or "endgame-*" not in indices:
715715
return None
716716

717717
endgame_schema = endgame.read_endgame_schema(endgame_version=endgame_version)

0 commit comments

Comments
 (0)