Skip to content

Commit 8b9591e

Browse files
author
Patrick Bareiss
committed
update
1 parent 528f585 commit 8b9591e

File tree

1 file changed

+89
-11
lines changed

1 file changed

+89
-11
lines changed

bin/dataset_analyzer.py

Lines changed: 89 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,14 @@ def _initialize_rules(self) -> List[CategoryRule]:
241241
CategoryRule(
242242
pattern=r".*crowdstrike.*",
243243
sourcetype="crowdstrike:events:sensor",
244+
source="crowdstrike",
244245
content_check="json",
245246
description="CrowdStrike Falcon sensor events"
246247
),
247248
CategoryRule(
248249
pattern=r".*falcon.*",
249250
sourcetype="crowdstrike:events:sensor",
251+
source="crowdstrike",
250252
content_check="json",
251253
description="CrowdStrike Falcon sensor events"
252254
),
@@ -276,6 +278,12 @@ def _initialize_rules(self) -> List[CategoryRule]:
276278
source="auditd",
277279
description="Linux auditd logs"
278280
),
281+
CategoryRule(
282+
pattern=r".*sysmon_linux.*",
283+
sourcetype="sysmon:linux",
284+
source="Syslog:Linux-Sysmon/Operational",
285+
description="Linux Sysmon logs"
286+
),
279287

280288
# Network and Firewall logs
281289
CategoryRule(
@@ -289,8 +297,9 @@ def _initialize_rules(self) -> List[CategoryRule]:
289297
description="Palo Alto firewall logs"
290298
),
291299
CategoryRule(
292-
pattern=r".*cisco.*",
293-
sourcetype="cisco:asa",
300+
pattern=r".*cisco_secure_firewall.*",
301+
sourcetype="cisco:sfw:estreamer",
302+
source="not_applicable",
294303
description="Cisco network device logs"
295304
),
296305

@@ -313,42 +322,108 @@ def _initialize_rules(self) -> List[CategoryRule]:
313322
CategoryRule(
314323
pattern=r".*iis.*",
315324
sourcetype="iis",
325+
source="iis",
316326
description="IIS web server logs"
317327
),
318328

319329
# Cloud and container logs
320330
CategoryRule(
321331
pattern=r".*aws.*",
322332
sourcetype="aws:cloudtrail",
333+
source="aws_cloudtrail",
323334
description="AWS CloudTrail logs"
324335
),
336+
CategoryRule(
337+
pattern=r".*asl.*",
338+
sourcetype="aws:cloudtrail:lake",
339+
source="aws_asl",
340+
description="AWS ASL logs"
341+
),
325342
CategoryRule(
326343
pattern=r".*azure.*",
327344
sourcetype="azure:monitor:aad",
345+
source="azure",
328346
description="Azure activity logs"
329347
),
330-
CategoryRule(
331-
pattern=r".*docker.*",
332-
sourcetype="docker",
333-
description="Docker container logs"
334-
),
335348
CategoryRule(
336349
pattern=r".*kubernetes.*",
337-
sourcetype="aws:cloudwatchlogs",
350+
sourcetype="__json",
351+
source="kubernetes",
338352
description="Kubernetes container logs"
339353
),
340354

341355
# Application logs
356+
CategoryRule(
357+
pattern=r".*okta.*",
358+
sourcetype="OktaIM2:log",
359+
source="Okta",
360+
description="Okta logs"
361+
),
362+
CategoryRule(
363+
pattern=r".*pingid.*",
364+
sourcetype="__json",
365+
source="PINGID",
366+
description="PingID logs"
367+
),
368+
CategoryRule(
369+
pattern=r".*gws.*",
370+
sourcetype="gws:reports:login",
371+
source="gws:reports:login",
372+
description="Google Workspace logs"
373+
),
374+
CategoryRule(
375+
pattern=r".*gsuite.*",
376+
sourcetype="gsuite:gmail:bigquery",
377+
source="http:gsuite",
378+
description="GSuite logs"
379+
),
380+
CategoryRule(
381+
pattern=r".*o365.*",
382+
sourcetype="o365:management:activity",
383+
source="o365",
384+
description="O365 logs"
385+
),
386+
CategoryRule(
387+
pattern=r".*cisco_duo.*",
388+
sourcetype="cisco:duo:administrator",
389+
source="duo",
390+
description="Cisco Duo logs"
391+
),
392+
CategoryRule(
393+
pattern=r".*esxi.*",
394+
sourcetype="vmw-syslog",
395+
source="vmware:esxlog",
396+
description="VMware ESXi logs"
397+
),
398+
CategoryRule(
399+
pattern=r".*zscalar.*",
400+
sourcetype="zscalernss-web",
401+
source="zscaler",
402+
description="Zscaler logs"
403+
),
404+
CategoryRule(
405+
pattern=r".*suricata.*",
406+
sourcetype="suricata",
407+
source="suricata",
408+
description="Suricata logs"
409+
),
342410
CategoryRule(
343411
pattern=r".*exchange.*",
344412
sourcetype="MSExchange:Management",
413+
source="MSExchange:Management",
345414
description="Microsoft Exchange logs"
346415
),
347416
CategoryRule(
348417
pattern=r".*sharepoint.*",
349418
sourcetype="sharepoint:uls",
350419
description="SharePoint logs"
351420
),
421+
CategoryRule(
422+
pattern=r".*crushftp.*",
423+
sourcetype="crushftp:sessionlogs",
424+
source="crushftp",
425+
description="CrushFTP logs"
426+
),
352427

353428
# JSON format logs (generic)
354429
CategoryRule(
@@ -447,10 +522,13 @@ def _apply_rules(self, file_path: Path) -> Optional[
447522
return None
448523

449524
def _is_data_file(self, file_path: Path) -> bool:
450-
"""Check if a file is a data file (not .yml or .zip)"""
525+
"""Check if a file is a data file (not .yml, .zip, or system files)"""
451526
excluded_extensions = {'.yml', '.yaml', '.zip', '.tar', '.gz',
452527
'.rar', '.7z'}
453-
return file_path.suffix.lower() not in excluded_extensions
528+
excluded_filenames = {'.ds_store'}
529+
530+
return (file_path.suffix.lower() not in excluded_extensions and
531+
file_path.name.lower() not in excluded_filenames)
454532

455533
def _find_datasets(self, technique_path: Path) -> List[Path]:
456534
"""Find all dataset files in a technique directory"""
@@ -1019,7 +1097,7 @@ def generate_summary_report(self, results: Dict[str, Dict]) -> str:
10191097
report += f"\nIgnored File Details:\n{'-' * 50}\n"
10201098
for ignored_file in ignored_files:
10211099
reason = ignored_file['reason'].replace('_', ' ').title()
1022-
report += f"{ignored_file['name']:<50} {reason}\n"
1100+
report += f"{ignored_file['path']:<80} {reason}\n"
10231101

10241102
report += f"\n{'-' * 50}\n"
10251103
report += (f"Report generated on: "

0 commit comments

Comments
 (0)