Skip to content

Commit 049f1c1

Browse files
committed
fix: handle missing/deprecated data sources, separate data component parsing in v18+
1 parent 5028631 commit 049f1c1

File tree

2 files changed

+72
-18
lines changed

2 files changed

+72
-18
lines changed

mitreattack/attackToExcel/attackToExcel.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import argparse
44
import os
5+
import re
56
from typing import Dict, List, Optional
67

78
import pandas as pd
@@ -80,10 +81,10 @@ def get_stix_data(
8081
return mem_store
8182

8283

83-
def build_dataframes(src: MemoryStore, domain: str) -> Dict:
84+
def build_dataframes_pre_v18(src: MemoryStore, domain: str) -> Dict:
8485
"""Build pandas dataframes for each attack type, and return a dictionary lookup for each type to the relevant dataframe.
8586
86-
:returns:
87+
This version of the function is used for ATT&CK versions prior to v18, to account for changes to data components/data sources.
8788
8889
Parameters
8990
----------
@@ -114,6 +115,38 @@ def build_dataframes(src: MemoryStore, domain: str) -> Dict:
114115
return df
115116

116117

118+
def build_dataframes(src: MemoryStore, domain: str) -> Dict:
119+
"""Build pandas dataframes for each attack type, and return a dictionary lookup for each type to the relevant dataframe.
120+
121+
Parameters
122+
----------
123+
src : MemoryStore
124+
MemoryStore or other stix2 DataSource object
125+
domain : str
126+
domain of ATT&CK src corresponds to, e.g "enterprise-attack"
127+
128+
Returns
129+
-------
130+
dict
131+
A dict lookup of each ATT&CK type to dataframes for the given type to be ingested by write_excel
132+
"""
133+
df = {
134+
"techniques": stixToDf.techniquesToDf(src, domain),
135+
"tactics": stixToDf.tacticsToDf(src),
136+
"software": stixToDf.softwareToDf(src),
137+
"groups": stixToDf.groupsToDf(src),
138+
"campaigns": stixToDf.campaignsToDf(src),
139+
"assets": stixToDf.assetsToDf(src),
140+
"mitigations": stixToDf.mitigationsToDf(src),
141+
"matrices": stixToDf.matricesToDf(src, domain),
142+
"relationships": stixToDf.relationshipsToDf(src),
143+
"datacomponents": stixToDf.datacomponentsToDf(src),
144+
"analytics": stixToDf.analyticsToDf(src),
145+
"detectionstrategies": stixToDf.detectionstrategiesToDf(src),
146+
}
147+
return df
148+
149+
117150
def write_excel(dataframes: Dict, domain: str, version: Optional[str] = None, output_dir: str = ".") -> List:
118151
"""Given a set of dataframes from build_dataframes, write the ATT&CK dataset to output directory.
119152
@@ -148,7 +181,7 @@ def write_excel(dataframes: Dict, domain: str, version: Optional[str] = None, ou
148181
os.makedirs(output_directory)
149182
# master dataset file
150183
master_fp = os.path.join(output_directory, f"{domain_version_string}.xlsx")
151-
with pd.ExcelWriter(master_fp, engine="xlsxwriter") as master_writer:
184+
with pd.ExcelWriter(path=master_fp, engine="xlsxwriter") as master_writer:
152185
# master list of citations
153186
citations = pd.DataFrame()
154187

@@ -324,6 +357,15 @@ def export(
324357
logger.info(f"************ Exporting {domain} to Excel ************")
325358

326359
# build dataframes
360+
if version:
361+
version_pattern = r"v(\d+)\.(\d+)$"
362+
match = re.search(version_pattern, version)
363+
if match:
364+
major_version = int(match.group(1))
365+
if major_version < 18:
366+
dataframes = build_dataframes_pre_v18(src=mem_store, domain=domain)
367+
write_excel(dataframes=dataframes, domain=domain, version=version, output_dir=output_dir)
368+
327369
dataframes = build_dataframes(src=mem_store, domain=domain)
328370
write_excel(dataframes=dataframes, domain=domain, version=version, output_dir=output_dir)
329371

mitreattack/attackToExcel/stixToDf.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ def tacticsToDf(src):
259259
def datasourcesToDf(src):
260260
"""Parse STIX Data Sources and their Data components from the given data and return corresponding pandas dataframes.
261261
262+
This is only used in versions of ATT&CK before v18.
263+
262264
:param src: MemoryStore or other stix2 DataSource object holding the domain data
263265
:returns: a lookup of labels (descriptors/names) to dataframes
264266
"""
@@ -290,7 +292,7 @@ def datasourcesToDf(src):
290292
if "x_mitre_aliases" in data_object:
291293
row["aliases"] = ", ".join(sorted(data_object["x_mitre_aliases"][1:]))
292294
if data_object["type"] == "x-mitre-data-component":
293-
if "x_mitre_data_source_ref" in data_object:
295+
if "x_mitre_data_source_ref" in data_object and data_object["x_mitre_data_source_ref"] in source_lookup:
294296
row["name"] = f"{source_lookup[data_object['x_mitre_data_source_ref']]}: {data_object['name']}"
295297
row["type"] = "datacomponent"
296298
else:
@@ -333,6 +335,29 @@ def datasourcesToDf(src):
333335
return dataframes
334336

335337

338+
def datacomponentsToDf(src):
339+
"""Parse STIX Data components from the given data and return corresponding pandas dataframes.
340+
341+
:param src: MemoryStore or other stix2 DataSource object holding the domain data
342+
:returns: a lookup of labels (descriptors/names) to dataframes
343+
"""
344+
data_components = src.query([Filter("type", "=", "x-mitre-data-component")])
345+
data_components = remove_revoked_deprecated(data_components)
346+
347+
data_component_rows = []
348+
for data_component in tqdm(data_components, desc="parsing data components"):
349+
data_component_rows.append(parseBaseStix(data_component))
350+
351+
citations = get_citations(data_components)
352+
dataframes = {
353+
"datacomponents": pd.DataFrame(data_component_rows).sort_values("name"),
354+
}
355+
if not citations.empty:
356+
dataframes["citations"] = citations.sort_values("reference")
357+
358+
return dataframes
359+
360+
336361
def analyticsToDf(src):
337362
"""Parse STIX Analytics from the given data and return corresponding pandas dataframes.
338363
@@ -380,22 +405,9 @@ def detectionstrategiesToDf(src):
380405
dataframes = {
381406
"detectionstrategies": pd.DataFrame(detection_strategy_rows).sort_values("name"),
382407
}
383-
384-
# add relationships
385-
codex = relationshipsToDf(src, relatedType="detectionstrategy")
386-
dataframes.update(codex)
387-
# add relationship references
388-
dataframes["detectionstrategies"]["relationship citations"] = _get_relationship_citations(
389-
dataframes["detectionstrategies"], codex
390-
)
391-
# add/merge citations
392408
if not citations.empty:
393409
if "citations" in dataframes: # append to existing citations from references
394-
dataframes["citations"] = pd.concat([dataframes["citations"], citations])
395-
else: # add citations
396-
dataframes["citations"] = citations
397-
398-
dataframes["citations"].sort_values("reference")
410+
dataframes["citations"] = citations.sort_values("reference")
399411

400412
else:
401413
logger.warning("No detection strategies found - nothing to parse")

0 commit comments

Comments
 (0)