Skip to content

Commit 6704d5f

Browse files
author
Patrick Bareiss
committed
updates to replay script
1 parent ccc10f8 commit 6704d5f

File tree

2 files changed

+243
-154
lines changed

2 files changed

+243
-154
lines changed

bin/replay.py

Lines changed: 0 additions & 154 deletions
This file was deleted.

bin/replay_all.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
#!/usr/bin/env python3
2+
3+
import os
4+
import sys
5+
import argparse
6+
import glob
7+
import uuid
8+
import urllib
9+
import requests
10+
from urllib3 import disable_warnings
11+
import yaml
12+
from pathlib import Path
13+
14+
15+
def load_environment_variables():
16+
"""Load required environment variables for Splunk connection."""
17+
required_vars = ['SPLUNK_HOST', 'SPLUNK_HEC_TOKEN']
18+
env_vars = {}
19+
for var in required_vars:
20+
value = os.environ.get(var)
21+
if not value:
22+
raise ValueError(f"Environment variable {var} is required but not set")
23+
env_vars[var.lower().replace('splunk_', '')] = value
24+
return env_vars
25+
26+
27+
def find_data_yml_files(folder_path):
28+
"""Find all data.yml files recursively in folder and subfolders."""
29+
data_yml_files = []
30+
folder_path = Path(folder_path)
31+
32+
# Use pathlib to recursively find all data.yml files
33+
for yml_file in folder_path.rglob("data.yml"):
34+
data_yml_files.append(str(yml_file))
35+
36+
if not data_yml_files:
37+
print(f"Warning: No data.yml files found in {folder_path}")
38+
else:
39+
print(f"Found {len(data_yml_files)} data.yml files")
40+
41+
return data_yml_files
42+
43+
44+
def parse_data_yml(yml_file_path):
45+
"""Parse a data.yml file and extract dataset information."""
46+
try:
47+
with open(yml_file_path, 'r') as file:
48+
data = yaml.safe_load(file)
49+
50+
# Extract required fields
51+
file_id = data.get('id', str(uuid.uuid4()))
52+
datasets = data.get('datasets', [])
53+
54+
# Return tuple of (id, datasets_list)
55+
return file_id, datasets
56+
57+
except Exception as e:
58+
print(f"Error parsing {yml_file_path}: {e}")
59+
return None, []
60+
61+
62+
def find_data_files(folder_path):
63+
"""Find all data files in the specified folder (supports .log, .json, .txt)."""
64+
files = []
65+
for ext in ("*.log", "*.json", "*.txt"):
66+
files.extend(glob.glob(os.path.join(folder_path, ext)))
67+
if not files:
68+
print(f"Warning: No data files found in {folder_path}")
69+
return files
70+
71+
72+
def send_data_to_splunk(file_path, splunk_host, hec_token, event_host_uuid,
73+
index="test", source="test", sourcetype="test"):
74+
"""Send a data file to Splunk HEC."""
75+
disable_warnings()
76+
hec_channel = str(uuid.uuid4())
77+
headers = {
78+
"Authorization": f"Splunk {hec_token}",
79+
"X-Splunk-Request-Channel": hec_channel,
80+
}
81+
url_params = {
82+
"index": index,
83+
"source": source,
84+
"sourcetype": sourcetype,
85+
"host": event_host_uuid,
86+
}
87+
url = urllib.parse.urljoin(
88+
f"https://{splunk_host}:8088",
89+
"services/collector/raw"
90+
)
91+
with open(file_path, "rb") as datafile:
92+
try:
93+
res = requests.post(
94+
url,
95+
params=url_params,
96+
data=datafile.read(),
97+
allow_redirects=True,
98+
headers=headers,
99+
verify=False,
100+
)
101+
res.raise_for_status()
102+
print(f":white_check_mark: Sent {file_path} to Splunk HEC")
103+
except Exception as e:
104+
print(f":x: Error sending {file_path} to Splunk HEC: {e}")
105+
106+
107+
def main():
108+
parser = argparse.ArgumentParser(
109+
description="Recursively find and replay datasets from data.yml files "
110+
"to Splunk via HTTP Event Collector (HEC)",
111+
epilog="""
112+
Environment Variables Required:
113+
SPLUNK_HOST - Splunk server hostname/IP
114+
SPLUNK_HEC_TOKEN - Splunk HEC token
115+
116+
Example usage:
117+
python replay_all.py /path/to/datasets/folder
118+
python replay_all.py datasets/attack_techniques --host-uuid 12345678-abcd-efgh
119+
export SPLUNK_HOST="192.168.1.100"
120+
export SPLUNK_HEC_TOKEN="your-hec-token"
121+
122+
This script will:
123+
1. Recursively find all data.yml files in the specified directory
124+
2. Parse each data.yml file to extract dataset information
125+
3. Replay each dataset using the source and sourcetype from the yml file
126+
4. Use the id field from data.yml as the host field for Splunk events
127+
""",
128+
formatter_class=argparse.RawDescriptionHelpFormatter,
129+
)
130+
parser.add_argument(
131+
'path',
132+
help='Path to a directory containing data.yml files '
133+
'(searches recursively)'
134+
)
135+
parser.add_argument(
136+
'--source',
137+
default='test',
138+
help='Source field for Splunk events (default: test)'
139+
)
140+
parser.add_argument(
141+
'--sourcetype',
142+
default='test',
143+
help='Sourcetype field for Splunk events (default: test)'
144+
)
145+
parser.add_argument(
146+
'--index',
147+
default='test',
148+
help='Splunk index to send events to (default: test)'
149+
)
150+
parser.add_argument(
151+
'--host-uuid',
152+
help='UUID to use as the host field for Splunk events '
153+
'(generates random UUID if not provided)'
154+
)
155+
args = parser.parse_args()
156+
157+
try:
158+
env_vars = load_environment_variables()
159+
splunk_host = env_vars['host']
160+
hec_token = env_vars['hec_token']
161+
162+
if not os.path.isdir(args.path):
163+
print(f"Error: {args.path} is not a valid directory")
164+
sys.exit(1)
165+
166+
# Find all data.yml files recursively
167+
data_yml_files = find_data_yml_files(args.path)
168+
169+
if not data_yml_files:
170+
print(f"No data.yml files found in {args.path}")
171+
sys.exit(1)
172+
173+
# Process each data.yml file
174+
for yml_file in data_yml_files:
175+
print(f"\nProcessing {yml_file}...")
176+
file_id, datasets = parse_data_yml(yml_file)
177+
178+
if not file_id or not datasets:
179+
print(f"Skipping {yml_file} - no valid data found")
180+
continue
181+
182+
# Use the id from data.yml as host field (unless user provided one)
183+
event_host_uuid = args.host_uuid or file_id
184+
print(f"Using host UUID: {event_host_uuid}")
185+
186+
# Process each dataset in the data.yml file
187+
for dataset in datasets:
188+
dataset_name = dataset.get('name', 'unknown')
189+
dataset_path = dataset.get('path', '')
190+
dataset_source = dataset.get('source', args.source)
191+
dataset_sourcetype = dataset.get('sourcetype', args.sourcetype)
192+
193+
if not dataset_path:
194+
print(f"Warning: No path specified for dataset "
195+
f"'{dataset_name}', skipping")
196+
continue
197+
198+
# Handle relative paths - relative to attack_data root
199+
if dataset_path.startswith('/datasets/'):
200+
# Convert to absolute path based on project structure
201+
if Path(args.path).name == 'datasets':
202+
base_dir = Path(args.path).parent
203+
else:
204+
base_dir = Path(args.path)
205+
while (base_dir.name != 'attack_data' and
206+
base_dir.parent != base_dir):
207+
base_dir = base_dir.parent
208+
209+
if base_dir.name == 'attack_data':
210+
full_path = base_dir / dataset_path.lstrip('/')
211+
else:
212+
# Fallback: assume current working directory structure
213+
full_path = Path.cwd() / dataset_path.lstrip('/')
214+
else:
215+
# Assume relative to yml file location
216+
yml_dir = Path(yml_file).parent
217+
full_path = yml_dir / dataset_path
218+
219+
if not full_path.exists():
220+
print(f"Warning: Dataset file not found: {full_path}")
221+
continue
222+
223+
print(f" Sending dataset '{dataset_name}' from {full_path}")
224+
print(f" source: {dataset_source}")
225+
print(f" sourcetype: {dataset_sourcetype}")
226+
227+
send_data_to_splunk(
228+
file_path=str(full_path),
229+
splunk_host=splunk_host,
230+
hec_token=hec_token,
231+
event_host_uuid=event_host_uuid,
232+
index=args.index,
233+
source=dataset_source,
234+
sourcetype=dataset_sourcetype,
235+
)
236+
237+
except Exception as e:
238+
print(f"Error: {e}")
239+
sys.exit(1)
240+
241+
242+
if __name__ == "__main__":
243+
main()

0 commit comments

Comments
 (0)