-
Notifications
You must be signed in to change notification settings - Fork 133
129 lines (115 loc) · 4.85 KB
/
replay-datasets.yml
File metadata and controls
129 lines (115 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
name: Replay Changed Datasets to Splunk
on:
push:
branches: [master]
paths:
- 'datasets/**'
workflow_dispatch:
inputs:
dataset_path:
description: 'Specific dataset path to replay (optional, defaults to all changed files)'
required: false
type: string
jobs:
replay-datasets:
runs-on:
group: attack-data-runners
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch full history for file change detection
- name: Pull Git LFS files
run: git lfs pull
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install dependencies
run: |
cd bin
pip install -r requirements.txt
- name: Find changed YAML files
id: changed-files
if: github.event_name != 'workflow_dispatch' || github.event.inputs.dataset_path == ''
run: |
# Get list of changed YAML files in datasets directory
if [ "${{ github.event_name }}" = "pull_request" ]; then
# For PR, compare against base branch
BASE_SHA="${{ github.event.pull_request.base.sha }}"
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
echo "Comparing PR: $BASE_SHA...$HEAD_SHA"
YAML_FILES=$(python bin/find_changed_datasets.py --base-sha $BASE_SHA --head-sha $HEAD_SHA --output files 2>/dev/null || echo "")
else
# For push, compare against previous commit
BASE_SHA="${{ github.event.before }}"
HEAD_SHA="${{ github.sha }}"
echo "Comparing push: $BASE_SHA...$HEAD_SHA"
YAML_FILES=$(python bin/find_changed_datasets.py --base-sha $BASE_SHA --head-sha $HEAD_SHA --output files 2>/dev/null || echo "")
fi
if [ -z "$YAML_FILES" ]; then
echo "No YAML dataset files changed"
echo "yaml_files=" >> $GITHUB_OUTPUT
else
echo "Changed YAML files:"
echo "$YAML_FILES"
# Convert newlines to spaces for easier handling
YAML_FILES_SPACE=$(echo "$YAML_FILES" | tr '\n' ' ')
echo "yaml_files=$YAML_FILES_SPACE" >> $GITHUB_OUTPUT
fi
- name: Set manual dataset path
id: manual-path
if: github.event_name == 'workflow_dispatch' && github.event.inputs.dataset_path != ''
run: |
# For manual dispatch, find YAML files in the specified path
if [ -f "${{ github.event.inputs.dataset_path }}" ]; then
# Single file provided
echo "yaml_files=${{ github.event.inputs.dataset_path }}" >> $GITHUB_OUTPUT
else
# Directory provided - find YAML files
YAML_FILES=$(python bin/find_changed_datasets.py --directory "${{ github.event.inputs.dataset_path }}" --output files 2>/dev/null || echo "")
if [ -n "$YAML_FILES" ]; then
YAML_FILES_SPACE=$(echo "$YAML_FILES" | tr '\n' ' ')
echo "yaml_files=$YAML_FILES_SPACE" >> $GITHUB_OUTPUT
else
echo "yaml_files=" >> $GITHUB_OUTPUT
fi
fi
- name: Replay datasets to Splunk
if: steps.changed-files.outputs.yaml_files != '' || steps.manual-path.outputs.yaml_files != ''
env:
SPLUNK_HOST: ${{ secrets.SPLUNK_HOST }}
SPLUNK_HEC_TOKEN: ${{ secrets.SPLUNK_HEC_TOKEN }}
run: |
# Get the YAML files to process
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
YAML_FILES="${{ steps.manual-path.outputs.yaml_files }}"
else
YAML_FILES="${{ steps.changed-files.outputs.yaml_files }}"
fi
if [ -z "$YAML_FILES" ]; then
echo "No YAML files to process"
exit 0
fi
echo "Processing YAML files: $YAML_FILES"
# Run replay script with all YAML files
# The replay script now reads all metadata from the YAML files themselves
python bin/replay.py $YAML_FILES || echo "Failed to replay some datasets"
- name: Summary
if: always()
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
YAML_FILES="${{ steps.manual-path.outputs.yaml_files }}"
if [ -n "$YAML_FILES" ]; then
echo "Manual replay completed for YAML files: $YAML_FILES"
else
echo "No YAML files found in specified path: ${{ github.event.inputs.dataset_path }}"
fi
else
YAML_FILES="${{ steps.changed-files.outputs.yaml_files }}"
if [ -n "$YAML_FILES" ]; then
echo "Automated replay completed for changed YAML files: $YAML_FILES"
else
echo "No YAML dataset changes detected, no replay needed"
fi
fi