|
2 | 2 |
|
3 | 3 | import argparse |
4 | 4 | import os |
| 5 | +import re |
5 | 6 | from typing import Dict, List, Optional |
6 | 7 |
|
7 | 8 | import pandas as pd |
@@ -80,10 +81,10 @@ def get_stix_data( |
80 | 81 | return mem_store |
81 | 82 |
|
82 | 83 |
|
83 | | -def build_dataframes(src: MemoryStore, domain: str) -> Dict: |
| 84 | +def build_dataframes_pre_v18(src: MemoryStore, domain: str) -> Dict: |
84 | 85 | """Build pandas dataframes for each attack type, and return a dictionary lookup for each type to the relevant dataframe. |
85 | 86 |
|
86 | | - :returns: |
| 87 | + This version of the function is used for ATT&CK versions prior to v18, to account for changes to data components/data sources. |
87 | 88 |
|
88 | 89 | Parameters |
89 | 90 | ---------- |
@@ -114,6 +115,38 @@ def build_dataframes(src: MemoryStore, domain: str) -> Dict: |
114 | 115 | return df |
115 | 116 |
|
116 | 117 |
|
| 118 | +def build_dataframes(src: MemoryStore, domain: str) -> Dict: |
| 119 | + """Build pandas dataframes for each attack type, and return a dictionary lookup for each type to the relevant dataframe. |
| 120 | +
|
| 121 | + Parameters |
| 122 | + ---------- |
| 123 | + src : MemoryStore |
| 124 | + MemoryStore or other stix2 DataSource object |
| 125 | + domain : str |
| 126 | + domain of ATT&CK src corresponds to, e.g "enterprise-attack" |
| 127 | +
|
| 128 | + Returns |
| 129 | + ------- |
| 130 | + dict |
| 131 | + A dict lookup of each ATT&CK type to dataframes for the given type to be ingested by write_excel |
| 132 | + """ |
| 133 | + df = { |
| 134 | + "techniques": stixToDf.techniquesToDf(src, domain), |
| 135 | + "tactics": stixToDf.tacticsToDf(src), |
| 136 | + "software": stixToDf.softwareToDf(src), |
| 137 | + "groups": stixToDf.groupsToDf(src), |
| 138 | + "campaigns": stixToDf.campaignsToDf(src), |
| 139 | + "assets": stixToDf.assetsToDf(src), |
| 140 | + "mitigations": stixToDf.mitigationsToDf(src), |
| 141 | + "matrices": stixToDf.matricesToDf(src, domain), |
| 142 | + "relationships": stixToDf.relationshipsToDf(src), |
| 143 | + "datacomponents": stixToDf.datacomponentsToDf(src), |
| 144 | + "analytics": stixToDf.analyticsToDf(src), |
| 145 | + "detectionstrategies": stixToDf.detectionstrategiesToDf(src), |
| 146 | + } |
| 147 | + return df |
| 148 | + |
| 149 | + |
117 | 150 | def write_excel(dataframes: Dict, domain: str, version: Optional[str] = None, output_dir: str = ".") -> List: |
118 | 151 | """Given a set of dataframes from build_dataframes, write the ATT&CK dataset to output directory. |
119 | 152 |
|
@@ -148,7 +181,7 @@ def write_excel(dataframes: Dict, domain: str, version: Optional[str] = None, ou |
148 | 181 | os.makedirs(output_directory) |
149 | 182 | # master dataset file |
150 | 183 | master_fp = os.path.join(output_directory, f"{domain_version_string}.xlsx") |
151 | | - with pd.ExcelWriter(master_fp, engine="xlsxwriter") as master_writer: |
| 184 | + with pd.ExcelWriter(path=master_fp, engine="xlsxwriter") as master_writer: |
152 | 185 | # master list of citations |
153 | 186 | citations = pd.DataFrame() |
154 | 187 |
|
@@ -324,6 +357,15 @@ def export( |
324 | 357 | logger.info(f"************ Exporting {domain} to Excel ************") |
325 | 358 |
|
326 | 359 | # build dataframes |
| 360 | + if version: |
| 361 | + version_pattern = r"v(\d+)\.(\d+)$" |
| 362 | + match = re.search(version_pattern, version) |
| 363 | + if match: |
| 364 | + major_version = int(match.group(1)) |
| 365 | + if major_version < 18: |
| 366 | + dataframes = build_dataframes_pre_v18(src=mem_store, domain=domain) |
| 367 | + write_excel(dataframes=dataframes, domain=domain, version=version, output_dir=output_dir) |
| 368 | + |
327 | 369 | dataframes = build_dataframes(src=mem_store, domain=domain) |
328 | 370 | write_excel(dataframes=dataframes, domain=domain, version=version, output_dir=output_dir) |
329 | 371 |
|
|
0 commit comments