Skip to content

Commit 5bd600c

Browse files
authored
Merge pull request #193 from splunk/improved_data_sources
better data source handling
2 parents 7b10d64 + 85bd7c8 commit 5bd600c

File tree

15 files changed

+400
-121
lines changed

15 files changed

+400
-121
lines changed

contentctl/actions/build.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from contentctl.output.conf_writer import ConfWriter
1111
from contentctl.output.ba_yml_output import BAYmlOutput
1212
from contentctl.output.api_json_output import ApiJsonOutput
13+
from contentctl.output.data_source_writer import DataSourceWriter
14+
from contentctl.objects.lookup import Lookup
1315
import pathlib
1416
import json
1517
import datetime
@@ -28,9 +30,20 @@ class Build:
2830

2931

3032
def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto:
31-
if input_dto.config.build_app:
33+
if input_dto.config.build_app:
34+
3235
updated_conf_files:set[pathlib.Path] = set()
3336
conf_output = ConfOutput(input_dto.config)
37+
38+
# Construct a special lookup whose CSV is created at runtime and
39+
# written directly into the output folder. It is created with model_construct,
40+
# not model_validate, because the CSV does not exist yet.
41+
data_sources_lookup_csv_path = input_dto.config.getPackageDirectoryPath() / "lookups" / "data_sources.csv"
42+
DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, data_sources_lookup_csv_path)
43+
input_dto.director_output_dto.addContentToDictMappings(Lookup.model_construct(description= "A lookup file that will contain the data source objects for detections.",
44+
filename=data_sources_lookup_csv_path,
45+
name="data_sources"))
46+
3447
updated_conf_files.update(conf_output.writeHeaders())
3548
updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.detections, SecurityContentType.detections))
3649
updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.stories, SecurityContentType.stories))

contentctl/actions/initialize.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def execute(self, config: test) -> None:
2828
('../templates/app_template/', 'app_template'),
2929
('../templates/deployments/', 'deployments'),
3030
('../templates/detections/', 'detections'),
31+
('../templates/data_sources/', 'data_sources'),
3132
('../templates/macros/','macros'),
3233
('../templates/stories/', 'stories'),
3334
]:

contentctl/actions/validate.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ def execute(self, input_dto: validate) -> DirectorOutputDto:
2828
[],
2929
[],
3030
[],
31-
[],
3231
)
3332

3433
director = Director(director_output_dto)

contentctl/input/director.py

Lines changed: 39 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ class DirectorOutputDto:
5858
deployments: list[Deployment]
5959
ssa_detections: list[SSADetection]
6060
data_sources: list[DataSource]
61-
event_sources: list[EventSource]
6261
name_to_content_map: dict[str, SecurityContentObject] = field(default_factory=dict)
6362
uuid_to_content_map: dict[UUID, SecurityContentObject] = field(default_factory=dict)
6463

@@ -68,17 +67,19 @@ def addContentToDictMappings(self, content: SecurityContentObject):
6867
# Since SSA detections may have the same name as ESCU detection,
6968
# for this function we prepend 'SSA ' to the name.
7069
content_name = f"SSA {content_name}"
70+
7171
if content_name in self.name_to_content_map:
7272
raise ValueError(
7373
f"Duplicate name '{content_name}' with paths:\n"
7474
f" - {content.file_path}\n"
7575
f" - {self.name_to_content_map[content_name].file_path}"
7676
)
77-
elif content.id in self.uuid_to_content_map:
77+
78+
if content.id in self.uuid_to_content_map:
7879
raise ValueError(
7980
f"Duplicate id '{content.id}' with paths:\n"
8081
f" - {content.file_path}\n"
81-
f" - {self.name_to_content_map[content_name].file_path}"
82+
f" - {self.uuid_to_content_map[content.id].file_path}"
8283
)
8384

8485
if isinstance(content, Lookup):
@@ -99,9 +100,10 @@ def addContentToDictMappings(self, content: SecurityContentObject):
99100
self.detections.append(content)
100101
elif isinstance(content, SSADetection):
101102
self.ssa_detections.append(content)
103+
elif isinstance(content, DataSource):
104+
self.data_sources.append(content)
102105
else:
103-
raise Exception(f"Unknown security content type: {type(content)}")
104-
106+
raise Exception(f"Unknown security content type: {type(content)}")
105107

106108
self.name_to_content_map[content_name] = content
107109
self.uuid_to_content_map[content.id] = content
@@ -124,41 +126,27 @@ def execute(self, input_dto: validate) -> None:
124126
self.createSecurityContent(SecurityContentType.stories)
125127
self.createSecurityContent(SecurityContentType.baselines)
126128
self.createSecurityContent(SecurityContentType.investigations)
127-
self.createSecurityContent(SecurityContentType.event_sources)
128129
self.createSecurityContent(SecurityContentType.data_sources)
129130
self.createSecurityContent(SecurityContentType.playbooks)
130131
self.createSecurityContent(SecurityContentType.detections)
131132
self.createSecurityContent(SecurityContentType.ssa_detections)
132133

134+
135+
from contentctl.objects.abstract_security_content_objects.detection_abstract import MISSING_SOURCES
136+
if len(MISSING_SOURCES) > 0:
137+
missing_sources_string = "\n 🟡 ".join(sorted(list(MISSING_SOURCES)))
138+
print("WARNING: The following data_sources have been used in detections, but are not yet defined.\n"
139+
"This is not yet an error since not all data_sources have been defined, but will be convered to an error soon:\n 🟡 "
140+
f"{missing_sources_string}")
141+
else:
142+
print("No missing data_sources!")
143+
133144
def createSecurityContent(self, contentType: SecurityContentType) -> None:
134145
if contentType == SecurityContentType.ssa_detections:
135146
files = Utils.get_all_yml_files_from_directory(
136147
os.path.join(self.input_dto.path, "ssa_detections")
137148
)
138149
security_content_files = [f for f in files if f.name.startswith("ssa___")]
139-
140-
elif contentType == SecurityContentType.data_sources:
141-
security_content_files = (
142-
Utils.get_all_yml_files_from_directory_one_layer_deep(
143-
os.path.join(self.input_dto.path, "data_sources")
144-
)
145-
)
146-
147-
elif contentType == SecurityContentType.event_sources:
148-
security_content_files = Utils.get_all_yml_files_from_directory(
149-
os.path.join(self.input_dto.path, "data_sources", "cloud", "event_sources")
150-
)
151-
security_content_files.extend(
152-
Utils.get_all_yml_files_from_directory(
153-
os.path.join(self.input_dto.path, "data_sources", "endpoint", "event_sources")
154-
)
155-
)
156-
security_content_files.extend(
157-
Utils.get_all_yml_files_from_directory(
158-
os.path.join(self.input_dto.path, "data_sources", "network", "event_sources")
159-
)
160-
)
161-
162150
elif contentType in [
163151
SecurityContentType.deployments,
164152
SecurityContentType.lookups,
@@ -168,6 +156,7 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None:
168156
SecurityContentType.investigations,
169157
SecurityContentType.playbooks,
170158
SecurityContentType.detections,
159+
SecurityContentType.data_sources,
171160
]:
172161
files = Utils.get_all_yml_files_from_directory(
173162
os.path.join(self.input_dto.path, str(contentType.name))
@@ -190,54 +179,48 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None:
190179
modelDict = YmlReader.load_file(file)
191180

192181
if contentType == SecurityContentType.lookups:
193-
lookup = Lookup.model_validate(modelDict,context={"output_dto":self.output_dto, "config":self.input_dto})
194-
self.output_dto.addContentToDictMappings(lookup)
182+
lookup = Lookup.model_validate(modelDict,context={"output_dto":self.output_dto, "config":self.input_dto})
183+
self.output_dto.addContentToDictMappings(lookup)
195184

196185
elif contentType == SecurityContentType.macros:
197-
macro = Macro.model_validate(modelDict,context={"output_dto":self.output_dto})
198-
self.output_dto.addContentToDictMappings(macro)
186+
macro = Macro.model_validate(modelDict,context={"output_dto":self.output_dto})
187+
self.output_dto.addContentToDictMappings(macro)
199188

200189
elif contentType == SecurityContentType.deployments:
201-
deployment = Deployment.model_validate(modelDict,context={"output_dto":self.output_dto})
202-
self.output_dto.addContentToDictMappings(deployment)
190+
deployment = Deployment.model_validate(modelDict,context={"output_dto":self.output_dto})
191+
self.output_dto.addContentToDictMappings(deployment)
203192

204193
elif contentType == SecurityContentType.playbooks:
205-
playbook = Playbook.model_validate(modelDict,context={"output_dto":self.output_dto})
206-
self.output_dto.addContentToDictMappings(playbook)
194+
playbook = Playbook.model_validate(modelDict,context={"output_dto":self.output_dto})
195+
self.output_dto.addContentToDictMappings(playbook)
207196

208197
elif contentType == SecurityContentType.baselines:
209-
baseline = Baseline.model_validate(modelDict,context={"output_dto":self.output_dto})
210-
self.output_dto.addContentToDictMappings(baseline)
198+
baseline = Baseline.model_validate(modelDict,context={"output_dto":self.output_dto})
199+
self.output_dto.addContentToDictMappings(baseline)
211200

212201
elif contentType == SecurityContentType.investigations:
213-
investigation = Investigation.model_validate(modelDict,context={"output_dto":self.output_dto})
214-
self.output_dto.addContentToDictMappings(investigation)
202+
investigation = Investigation.model_validate(modelDict,context={"output_dto":self.output_dto})
203+
self.output_dto.addContentToDictMappings(investigation)
215204

216205
elif contentType == SecurityContentType.stories:
217-
story = Story.model_validate(modelDict,context={"output_dto":self.output_dto})
218-
self.output_dto.addContentToDictMappings(story)
206+
story = Story.model_validate(modelDict,context={"output_dto":self.output_dto})
207+
self.output_dto.addContentToDictMappings(story)
219208

220209
elif contentType == SecurityContentType.detections:
221-
detection = Detection.model_validate(modelDict,context={"output_dto":self.output_dto, "app":self.input_dto.app})
222-
self.output_dto.addContentToDictMappings(detection)
210+
detection = Detection.model_validate(modelDict,context={"output_dto":self.output_dto, "app":self.input_dto.app})
211+
self.output_dto.addContentToDictMappings(detection)
223212

224213
elif contentType == SecurityContentType.ssa_detections:
225-
self.constructSSADetection(self.ssa_detection_builder, self.output_dto,str(file))
226-
ssa_detection = self.ssa_detection_builder.getObject()
227-
if ssa_detection.status in [DetectionStatus.production.value, DetectionStatus.validation.value]:
228-
self.output_dto.addContentToDictMappings(ssa_detection)
214+
self.constructSSADetection(self.ssa_detection_builder, self.output_dto,str(file))
215+
ssa_detection = self.ssa_detection_builder.getObject()
216+
if ssa_detection.status in [DetectionStatus.production.value, DetectionStatus.validation.value]:
217+
self.output_dto.addContentToDictMappings(ssa_detection)
229218

230219
elif contentType == SecurityContentType.data_sources:
231220
data_source = DataSource.model_validate(
232221
modelDict, context={"output_dto": self.output_dto}
233222
)
234-
self.output_dto.data_sources.append(data_source)
235-
236-
elif contentType == SecurityContentType.event_sources:
237-
event_source = EventSource.model_validate(
238-
modelDict, context={"output_dto": self.output_dto}
239-
)
240-
self.output_dto.event_sources.append(event_source)
223+
self.output_dto.addContentToDictMappings(data_source)
241224

242225
else:
243226
raise Exception(f"Unsupported type: [{contentType}]")

contentctl/input/yml_reader.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def load_file(file_path: pathlib.Path, add_fields=True, STRICT_YML_CHECKING=Fals
4040
if add_fields == False:
4141
return yml_obj
4242

43+
4344
yml_obj['file_path'] = str(file_path)
45+
4446

4547
return yml_obj

contentctl/objects/abstract_security_content_objects/detection_abstract.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,26 @@
2222
from contentctl.objects.unit_test import UnitTest
2323
from contentctl.objects.test_group import TestGroup
2424
from contentctl.objects.integration_test import IntegrationTest
25-
25+
from contentctl.objects.event_source import EventSource
26+
from contentctl.objects.data_source import DataSource
2627

2728
#from contentctl.objects.playbook import Playbook
28-
from contentctl.objects.enums import DataSource,ProvidingTechnology
29+
from contentctl.objects.enums import ProvidingTechnology
2930
from contentctl.enrichments.cve_enrichment import CveEnrichmentObj
3031

32+
MISSING_SOURCES:set[str] = set()
3133

3234
class Detection_Abstract(SecurityContentObject):
3335
model_config = ConfigDict(use_enum_values=True)
3436

3537
#contentType: SecurityContentType = SecurityContentType.detections
3638
type: AnalyticsType = Field(...)
3739
status: DetectionStatus = Field(...)
38-
data_source: Optional[List[str]] = None
40+
data_source: list[str] = []
3941
tags: DetectionTags = Field(...)
4042
search: Union[str, dict[str,Any]] = Field(...)
4143
how_to_implement: str = Field(..., min_length=4)
4244
known_false_positives: str = Field(..., min_length=4)
43-
data_source_objects: Optional[List[DataSource]] = None
4445

4546
enabled_by_default: bool = False
4647
file_path: FilePath = Field(...)
@@ -53,6 +54,8 @@ class Detection_Abstract(SecurityContentObject):
5354
# A list of groups of tests, relying on the same data
5455
test_groups: Union[list[TestGroup], None] = Field(None,validate_default=True)
5556

57+
data_source_objects: list[DataSource] = []
58+
5659

5760
@field_validator("search", mode="before")
5861
@classmethod
@@ -138,6 +141,7 @@ def datamodel(self)->List[DataModel]:
138141
else:
139142
return []
140143

144+
141145
@computed_field
142146
@property
143147
def source(self)->str:
@@ -161,10 +165,12 @@ def annotations(self)->dict[str,Union[List[str],int,str]]:
161165
annotations_dict["type"] = self.type
162166
#annotations_dict["version"] = self.version
163167

168+
annotations_dict["data_source"] = self.data_source
169+
164170
#The annotations object is a superset of the mappings object.
165171
# So start with the mapping object.
166172
annotations_dict.update(self.mappings)
167-
173+
168174
#Make sure that the results are sorted for readability/easier diffs
169175
return dict(sorted(annotations_dict.items(), key=lambda item: item[0]))
170176

@@ -384,23 +390,37 @@ def model_post_init(self, ctx:dict[str,Any]):
384390
raise ValueError(f"Error, failed to replace detection reference in Baseline '{baseline.name}' to detection '{self.name}'")
385391
baseline.tags.detections = new_detections
386392

387-
self.data_source_objects = []
388-
for data_source_obj in director.data_sources:
389-
for detection_data_source in self.data_source:
390-
if data_source_obj.name in detection_data_source:
391-
self.data_source_objects.append(data_source_obj)
392-
393-
# Remove duplicate data source objects based on their 'name' property
394-
unique_data_sources = {}
395-
for data_source_obj in self.data_source_objects:
396-
if data_source_obj.name not in unique_data_sources:
397-
unique_data_sources[data_source_obj.name] = data_source_obj
398-
self.data_source_objects = list(unique_data_sources.values())
393+
# Data source may be defined 1 on each line, OR they may be defined as
394+
# SOUCE_1 AND ANOTHERSOURCE AND A_THIRD_SOURCE
395+
# if more than 1 data source is required for a detection (for example, because it includes a join)
396+
# Parse and update the list to resolve individual names and remove potential duplicates
397+
updated_data_source_names:set[str] = set()
398+
399+
for ds in self.data_source:
400+
split_data_sources = {d.strip() for d in ds.split('AND')}
401+
updated_data_source_names.update(split_data_sources)
402+
403+
sources = sorted(list(updated_data_source_names))
404+
405+
matched_data_sources:list[DataSource] = []
406+
missing_sources:list[str] = []
407+
for source in sources:
408+
try:
409+
matched_data_sources += DataSource.mapNamesToSecurityContentObjects([source], director)
410+
except Exception as data_source_mapping_exception:
411+
# We gobble this up and add it to a global set so that we
412+
# can print it ONCE at the end of the build of datasources.
413+
# This will be removed later as per the note below
414+
MISSING_SOURCES.add(source)
415+
416+
if len(missing_sources) > 0:
417+
# This will be changed to ValueError when we have a complete list of data sources
418+
print(f"WARNING: The following exception occurred when mapping the data_source field to DataSource objects:{missing_sources}")
419+
420+
self.data_source_objects = matched_data_sources
399421

400422
for story in self.tags.analytic_story:
401-
story.detections.append(self)
402-
story.data_sources.extend(self.data_source_objects)
403-
423+
story.detections.append(self)
404424
return self
405425

406426

@@ -424,14 +444,16 @@ def mapDetectionNamesToBaselineObjects(cls, v:list[str], info:ValidationInfo)->L
424444
raise ValueError("Error, baselines are constructed automatically at runtime. Please do not include this field.")
425445

426446

427-
name:Union[str,dict] = info.data.get("name",None)
447+
name:Union[str,None] = info.data.get("name",None)
428448
if name is None:
429449
raise ValueError("Error, cannot get Baselines because the Detection does not have a 'name' defined.")
430-
450+
431451
director:DirectorOutputDto = info.context.get("output_dto",None)
432452
baselines:List[Baseline] = []
433453
for baseline in director.baselines:
434-
if name in baseline.tags.detections:
454+
# This matching is a bit strange, because baseline.tags.detections starts as a list of strings, but
455+
# is eventually updated to a list of Detections as we construct all of the detection objects.
456+
if name in [detection_name for detection_name in baseline.tags.detections if isinstance(detection_name,str)]:
435457
baselines.append(baseline)
436458

437459
return baselines

0 commit comments

Comments
 (0)