Merge branch 'main' into feature/validation_against_cms_main

pyth0n1c · web-flow · commit 15af1263590c · 2025-03-26T11:09:54.000-07:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ repos:
       - id: detect-private-key
       - id: forbid-submodules
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.2
+    rev: v0.11.0
     hooks:
       - id: ruff
         args: [ --fix ]
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -100,7 +100,7 @@ class DetectionTestingManagerOutputDto:
     start_time: Union[datetime.datetime, None] = None
     replay_index: str = "contentctl_testing_index"
     replay_host: str = "CONTENTCTL_HOST"
-    timeout_seconds: int = 60
+    timeout_seconds: int = 120
     terminate: bool = False
 
 
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -474,7 +474,7 @@ def serialize_model(self):
                         "name": lookup.name,
                         "description": lookup.description,
                         "filename": lookup.filename.name,
-                        "default_match": "true" if lookup.default_match else "false",
+                        "default_match": lookup.default_match,
                         "case_sensitive_match": "true"
                         if lookup.case_sensitive_match
                         else "false",
@@ -1055,3 +1055,30 @@ def get_summary(
         # Return the summary
 
         return summary_dict
+
+    @model_validator(mode="after")
+    def validate_data_source_output_fields(self):
+        # Skip validation for Hunting and Correlation types, or non-production detections
+        if self.status != DetectionStatus.production or self.type in {
+            AnalyticsType.Hunting,
+            AnalyticsType.Correlation,
+        }:
+            return self
+
+        # Validate that all required output fields are present in the search
+        for data_source in self.data_source_objects:
+            if not data_source.output_fields:
+                continue
+
+            missing_fields = [
+                field for field in data_source.output_fields if field not in self.search
+            ]
+
+            if missing_fields:
+                raise ValueError(
+                    f"Data source '{data_source.name}' has output fields "
+                    f"{missing_fields} that are not present in the search "
+                    f"for detection '{self.name}'"
+                )
+
+        return self
diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py
@@ -17,10 +17,12 @@ class DataSource(SecurityContentObject):
     source: str = Field(...)
     sourcetype: str = Field(...)
     separator: Optional[str] = None
+    separator_value: None | str = None
     configuration: Optional[str] = None
     supported_TA: list[TA] = []
     fields: None | list = None
     field_mappings: None | list = None
+    mitre_components: list[str] = []
     convert_to_log_source: None | list = None
     example_log: None | str = None
     output_fields: list[str] = []
diff --git a/contentctl/objects/lookup.py b/contentctl/objects/lookup.py
@@ -6,9 +6,10 @@
 import re
 from enum import StrEnum, auto
 from functools import cached_property
-from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, Self
+from typing import TYPE_CHECKING, Annotated, Any, Literal, Self
 
 from pydantic import (
+    BeforeValidator,
     Field,
     FilePath,
     NonNegativeInt,
@@ -69,7 +70,19 @@ class Lookup_Type(StrEnum):
 
 # TODO (#220): Split Lookup into 2 classes
 class Lookup(SecurityContentObject, abc.ABC):
-    default_match: Optional[bool] = None
+    # We need to make sure that this is converted to a string because we widely
+    # use the string "False" in our lookup content.  However, PyYAML reads this
+    # as a BOOL and this causes parsing to fail. As such, we will always
+    # convert this to a string if it is passed as a bool
+    default_match: Annotated[
+        str, BeforeValidator(lambda dm: str(dm).lower() if isinstance(dm, bool) else dm)
+    ] = Field(
+        default="",
+        description="This field is given a default value of ''"
+        "because it is the default value specified in the transforms.conf "
+        "docs. Giving it a type of str rather than str | None simplifies "
+        "the typing for the field.",
+    )
     # Per the documentation for transforms.conf, EXACT should not be specified in this list,
     # so we include only WILDCARD and CIDR
     match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(
@@ -88,7 +101,7 @@ def serialize_model(self):
 
         # All fields custom to this model
         model = {
-            "default_match": "true" if self.default_match is True else "false",
+            "default_match": self.default_match,
             "match_type": self.match_type_to_conf_format,
             "min_matches": self.min_matches,
             "max_matches": self.max_matches,
diff --git a/contentctl/output/attack_nav_output.py b/contentctl/output/attack_nav_output.py
@@ -1,5 +1,5 @@
-from typing import List, Union
 import pathlib
+from typing import List, Union
 
 from contentctl.objects.detection import Detection
 from contentctl.output.attack_nav_writer import AttackNavWriter
@@ -10,14 +10,21 @@ def writeObjects(
         self, detections: List[Detection], output_path: pathlib.Path
     ) -> None:
         techniques: dict[str, dict[str, Union[List[str], int]]] = {}
+
         for detection in detections:
             for tactic in detection.tags.mitre_attack_id:
                 if tactic not in techniques:
                     techniques[tactic] = {"score": 0, "file_paths": []}
 
-                detection_url = f"https://github.com/splunk/security_content/blob/develop/detections/{detection.source}/{detection.file_path.name}"
-                techniques[tactic]["score"] += 1
-                techniques[tactic]["file_paths"].append(detection_url)
+                detection_type = detection.source
+                detection_id = detection.id
+
+                # Store all three pieces of information separately
+                detection_info = f"{detection_type}|{detection_id}|{detection.name}"
+
+                techniques[tactic]["score"] = techniques[tactic].get("score", 0) + 1
+                if isinstance(techniques[tactic]["file_paths"], list):
+                    techniques[tactic]["file_paths"].append(detection_info)
 
         """
         for detection in objects:
diff --git a/contentctl/output/attack_nav_writer.py b/contentctl/output/attack_nav_writer.py
@@ -1,11 +1,11 @@
 import json
-from typing import Union, List
 import pathlib
+from typing import List, Union
 
-VERSION = "4.3"
+VERSION = "4.5"
 NAME = "Detection Coverage"
-DESCRIPTION = "security_content detection coverage"
-DOMAIN = "mitre-enterprise"
+DESCRIPTION = "Security Content Detection Coverage"
+DOMAIN = "enterprise-attack"
 
 
 class AttackNavWriter:
@@ -14,52 +14,68 @@ def writeAttackNavFile(
         mitre_techniques: dict[str, dict[str, Union[List[str], int]]],
         output_path: pathlib.Path,
     ) -> None:
-        max_count = 0
-        for technique_id in mitre_techniques.keys():
-            if mitre_techniques[technique_id]["score"] > max_count:
-                max_count = mitre_techniques[technique_id]["score"]
+        max_count = max(
+            (technique["score"] for technique in mitre_techniques.values()), default=0
+        )
 
         layer_json = {
-            "version": VERSION,
+            "versions": {"attack": "16", "navigator": "5.1.0", "layer": VERSION},
             "name": NAME,
             "description": DESCRIPTION,
             "domain": DOMAIN,
             "techniques": [],
+            "gradient": {
+                "colors": ["#ffffff", "#66b1ff", "#096ed7"],
+                "minValue": 0,
+                "maxValue": max_count,
+            },
+            "filters": {
+                "platforms": [
+                    "Windows",
+                    "Linux",
+                    "macOS",
+                    "Network",
+                    "AWS",
+                    "GCP",
+                    "Azure",
+                    "Azure AD",
+                    "Office 365",
+                    "SaaS",
+                ]
+            },
+            "layout": {
+                "layout": "side",
+                "showName": True,
+                "showID": True,
+                "showAggregateScores": False,
+            },
+            "legendItems": [
+                {"label": "No detections", "color": "#ffffff"},
+                {"label": "Has detections", "color": "#66b1ff"},
+            ],
+            "showTacticRowBackground": True,
+            "tacticRowBackground": "#dddddd",
+            "selectTechniquesAcrossTactics": True,
         }
 
-        layer_json["gradient"] = {
-            "colors": ["#ffffff", "#66b1ff", "#096ed7"],
-            "minValue": 0,
-            "maxValue": max_count,
-        }
-
-        layer_json["filters"] = {
-            "platforms": [
-                "Windows",
-                "Linux",
-                "macOS",
-                "AWS",
-                "GCP",
-                "Azure",
-                "Office 365",
-                "SaaS",
-            ]
-        }
+        for technique_id, data in mitre_techniques.items():
+            links = []
+            for detection_info in data["file_paths"]:
+                # Split the detection info into its components
+                detection_type, detection_id, detection_name = detection_info.split("|")
 
-        layer_json["legendItems"] = [
-            {"label": "NO available detections", "color": "#ffffff"},
-            {"label": "Some detections available", "color": "#66b1ff"},
-        ]
+                # Construct research website URL (without the name)
+                research_url = (
+                    f"https://research.splunk.com/{detection_type}/{detection_id}/"
+                )
 
-        layer_json["showTacticRowBackground"] = True
-        layer_json["tacticRowBackground"] = "#dddddd"
-        layer_json["sorting"] = 3
+                links.append({"label": detection_name, "url": research_url})
 
-        for technique_id in mitre_techniques.keys():
             layer_technique = {
                 "techniqueID": technique_id,
-                "score": mitre_techniques[technique_id]["score"],
-                "comment": "\n\n".join(mitre_techniques[technique_id]["file_paths"]),
+                "score": data["score"],
+                "enabled": True,
+                "links": links,
             }
             layer_json["techniques"].append(layer_technique)
 
diff --git a/contentctl/output/templates/transforms.j2 b/contentctl/output/templates/transforms.j2
@@ -7,8 +7,8 @@ filename = {{ lookup.app_filename.name  }}
 collection = {{ lookup.collection }}
 external_type = kvstore
 {% endif %}
-{% if lookup.default_match is defined and lookup.default_match != None  %}
-default_match = {{ lookup.default_match | lower }}
+{% if lookup.default_match != '' %}
+default_match = {{ lookup.default_match }}
 {% endif %}
 {% if lookup.case_sensitive_match is defined and lookup.case_sensitive_match != None %}
 case_sensitive_match = {{ lookup.case_sensitive_match | lower }}
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "contentctl"
 
-version = "5.1.0"
+version = "5.2.0"
 
 description = "Splunk Content Control Tool"
 authors = ["STRT <research@splunk.com>"]
@@ -19,7 +19,7 @@ PyYAML = "^6.0.2"
 requests = "~2.32.3"
 pycvesearch = "^1.2"
 xmltodict = ">=0.13,<0.15"
-attackcti = "^0.4.0"
+attackcti = ">=0.5.4,<0.6"
 Jinja2 = "^3.1.4"
 questionary = "^2.0.1"
 docker = "^7.1.0"
@@ -30,10 +30,10 @@ tqdm = "^4.66.5"
 pygit2 = "^1.15.1"
 tyro = "^0.9.2"
 gitpython = "^3.1.43"
-setuptools = ">=69.5.1,<76.0.0"
+setuptools = ">=69.5.1,<79.0.0"
 
 [tool.poetry.group.dev.dependencies]
-ruff = "^0.9.2"
+ruff = "^0.11.0"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]