@@ -241,12 +241,14 @@ def _initialize_rules(self) -> List[CategoryRule]:
241241 CategoryRule (
242242 pattern = r".*crowdstrike.*" ,
243243 sourcetype = "crowdstrike:events:sensor" ,
244+ source = "crowdstrike" ,
244245 content_check = "json" ,
245246 description = "CrowdStrike Falcon sensor events"
246247 ),
247248 CategoryRule (
248249 pattern = r".*falcon.*" ,
249250 sourcetype = "crowdstrike:events:sensor" ,
251+ source = "crowdstrike" ,
250252 content_check = "json" ,
251253 description = "CrowdStrike Falcon sensor events"
252254 ),
@@ -276,6 +278,12 @@ def _initialize_rules(self) -> List[CategoryRule]:
276278 source = "auditd" ,
277279 description = "Linux auditd logs"
278280 ),
281+ CategoryRule (
282+ pattern = r".*sysmon_linux.*" ,
283+ sourcetype = "sysmon:linux" ,
284+ source = "Syslog:Linux-Sysmon/Operational" ,
285+ description = "Linux Sysmon logs"
286+ ),
279287
280288 # Network and Firewall logs
281289 CategoryRule (
@@ -289,8 +297,9 @@ def _initialize_rules(self) -> List[CategoryRule]:
289297 description = "Palo Alto firewall logs"
290298 ),
291299 CategoryRule (
292- pattern = r".*cisco.*" ,
293- sourcetype = "cisco:asa" ,
300+ pattern = r".*cisco_secure_firewall.*" ,
301+ sourcetype = "cisco:sfw:estreamer" ,
302+ source = "not_applicable" ,
294303 description = "Cisco network device logs"
295304 ),
296305
@@ -313,42 +322,108 @@ def _initialize_rules(self) -> List[CategoryRule]:
313322 CategoryRule (
314323 pattern = r".*iis.*" ,
315324 sourcetype = "iis" ,
325+ source = "iis" ,
316326 description = "IIS web server logs"
317327 ),
318328
319329 # Cloud and container logs
320330 CategoryRule (
321331 pattern = r".*aws.*" ,
322332 sourcetype = "aws:cloudtrail" ,
333+ source = "aws_cloudtrail" ,
323334 description = "AWS CloudTrail logs"
324335 ),
336+ CategoryRule (
337+ pattern = r".*asl.*" ,
338+ sourcetype = "aws:cloudtrail:lake" ,
339+ source = "aws_asl" ,
340+ description = "AWS ASL logs"
341+ ),
325342 CategoryRule (
326343 pattern = r".*azure.*" ,
327344 sourcetype = "azure:monitor:aad" ,
345+ source = "azure" ,
328346 description = "Azure activity logs"
329347 ),
330- CategoryRule (
331- pattern = r".*docker.*" ,
332- sourcetype = "docker" ,
333- description = "Docker container logs"
334- ),
335348 CategoryRule (
336349 pattern = r".*kubernetes.*" ,
337- sourcetype = "aws:cloudwatchlogs" ,
350+ sourcetype = "__json" ,
351+ source = "kubernetes" ,
338352 description = "Kubernetes container logs"
339353 ),
340354
341355 # Application logs
356+ CategoryRule (
357+ pattern = r".*okta.*" ,
358+ sourcetype = "OktaIM2:log" ,
359+ source = "Okta" ,
360+ description = "Okta logs"
361+ ),
362+ CategoryRule (
363+ pattern = r".*pingid.*" ,
364+ sourcetype = "__json" ,
365+ source = "PINGID" ,
366+ description = "PingID logs"
367+ ),
368+ CategoryRule (
369+ pattern = r".*gws.*" ,
370+ sourcetype = "gws:reports:login" ,
371+ source = "gws:reports:login" ,
372+ description = "Google Workspace logs"
373+ ),
374+ CategoryRule (
375+ pattern = r".*gsuite.*" ,
376+ sourcetype = "gsuite:gmail:bigquery" ,
377+ source = "http:gsuite" ,
378+ description = "GSuite logs"
379+ ),
380+ CategoryRule (
381+ pattern = r".*o365.*" ,
382+ sourcetype = "o365:management:activity" ,
383+ source = "o365" ,
384+ description = "O365 logs"
385+ ),
386+ CategoryRule (
387+ pattern = r".*cisco_duo.*" ,
388+ sourcetype = "cisco:duo:administrator" ,
389+ source = "duo" ,
390+ description = "Cisco Duo logs"
391+ ),
392+ CategoryRule (
393+ pattern = r".*esxi.*" ,
394+ sourcetype = "vmw-syslog" ,
395+ source = "vmware:esxlog" ,
396+ description = "VMware ESXi logs"
397+ ),
398+ CategoryRule (
399+ pattern = r".*zscalar.*" ,
400+ sourcetype = "zscalernss-web" ,
401+ source = "zscaler" ,
402+ description = "Zscaler logs"
403+ ),
404+ CategoryRule (
405+ pattern = r".*suricata.*" ,
406+ sourcetype = "suricata" ,
407+ source = "suricata" ,
408+ description = "Suricata logs"
409+ ),
342410 CategoryRule (
343411 pattern = r".*exchange.*" ,
344412 sourcetype = "MSExchange:Management" ,
413+ source = "MSExchange:Management" ,
345414 description = "Microsoft Exchange logs"
346415 ),
347416 CategoryRule (
348417 pattern = r".*sharepoint.*" ,
349418 sourcetype = "sharepoint:uls" ,
350419 description = "SharePoint logs"
351420 ),
421+ CategoryRule (
422+ pattern = r".*crushftp.*" ,
423+ sourcetype = "crushftp:sessionlogs" ,
424+ source = "crushftp" ,
425+ description = "CrushFTP logs"
426+ ),
352427
353428 # JSON format logs (generic)
354429 CategoryRule (
@@ -447,10 +522,13 @@ def _apply_rules(self, file_path: Path) -> Optional[
447522 return None
448523
449524 def _is_data_file (self , file_path : Path ) -> bool :
450- """Check if a file is a data file (not .yml or .zip)"""
525+ """Check if a file is a data file (not .yml, .zip, or system files )"""
451526 excluded_extensions = {'.yml' , '.yaml' , '.zip' , '.tar' , '.gz' ,
452527 '.rar' , '.7z' }
453- return file_path .suffix .lower () not in excluded_extensions
528+ excluded_filenames = {'.ds_store' }
529+
530+ return (file_path .suffix .lower () not in excluded_extensions and
531+ file_path .name .lower () not in excluded_filenames )
454532
455533 def _find_datasets (self , technique_path : Path ) -> List [Path ]:
456534 """Find all dataset files in a technique directory"""
@@ -1019,7 +1097,7 @@ def generate_summary_report(self, results: Dict[str, Dict]) -> str:
10191097 report += f"\n Ignored File Details:\n { '-' * 50 } \n "
10201098 for ignored_file in ignored_files :
10211099 reason = ignored_file ['reason' ].replace ('_' , ' ' ).title ()
1022- report += f"{ ignored_file ['name ' ]:<50 } { reason } \n "
1100+ report += f"{ ignored_file ['path ' ]:<80 } { reason } \n "
10231101
10241102 report += f"\n { '-' * 50 } \n "
10251103 report += (f"Report generated on: "
0 commit comments