Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/validate_dataset_ymls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import datetime
import pathlib
import sys
from enum import StrEnum, auto
from uuid import UUID

from pydantic import BaseModel, Field, HttpUrl


class Environment(StrEnum):
attack_range = auto()


class AttackDataYml(BaseModel):
author: str = Field(..., min_length=5)
id: UUID
date: datetime.date
description: str = Field(..., min_length=5)
environment: Environment
dataset: list[HttpUrl] = Field(..., min_length=1)
sourcetypes: list[str] = Field(..., min_length=1)
references: list[HttpUrl] = Field(..., min_length=1)


# Get all of the yml files in the datasets folder
datasets_root = pathlib.Path("datasets/")


# We only permit certain filetypes to be present in this directory.
# This is to avoid the inclusion of unsupported file types and to
# assist in the validation of the YML files
ALLOWED_SUFFIXES = [".yml", ".log", ".json"]
SPECIAL_GIT_GILES = ".gitkeep"
bad_files = [
name
for name in datasets_root.glob(r"**/*.*")
if name.is_file()
and not (name.suffix in ALLOWED_SUFFIXES or name.name == SPECIAL_GIT_GILES)
]

if len(bad_files) > 0:
print(
f"Error, the following files were found in the {datasets_root} folder. Only files ending in {ALLOWED_SUFFIXES} or {SPECIAL_GIT_GILES} are allowed:"
)
print("\n".join([str(f) for f in bad_files]))
sys.exit(1)
45 changes: 45 additions & 0 deletions .github/workflows/mirror_data_archive.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: mirror-archive-on-merge-to-default-branch

on:
push:
branches:
- master

jobs:
mirror-archive:
runs-on: ubuntu-latest
env:
BUCKET: attack-range-attack-data
ATTACK_DATA_ARCHIVE_FILE: attack_data.tar.zstd
steps:
- name: Checkout Repo
uses: actions/checkout@v4
# We must EXPLICITLY specificy lfs: true. It defaults to false
with:
lfs: true

- name: Setup AWS CLI and Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.ACCESS_KEY}}
aws-secret-access-key: ${{ secrets.SECRET_ACCESS_KEY }}
aws-region: us-west-2

- name: Create archive of ONLY the datasets folder
run: |
# The structure of the tar + zstd archive should mirror that of checking out the repo directly
mkdir attack_data
mv datasets/ attack_data/.

#Build some metadata about the archive for documentation purposes
git rev-parse HEAD > attack_data/git_hash.txt
date -u > attack_data/cache_build_date.txt

# Compress with number of threads equal to number of CPU cores.
# Compression level 10 is a great compromise of speed and file size.
# File size reductions are diminishing returns after this - determined experimentally.
tar -c attack_data | zstd --compress -T0 -10 -o $ATTACK_DATA_ARCHIVE_FILE

- name: Upload Attack data archive file to S3 Bucket
run: |
aws s3 cp $ATTACK_DATA_ARCHIVE_FILE s3://$BUCKET/
Git LFS file not shown

This file was deleted.

3 changes: 3 additions & 0 deletions datasets/attack_techniques/T1203/search_activity.log
Git LFS file not shown
74,161 changes: 0 additions & 74,161 deletions datasets/attack_techniques/T1203/search_activity.txt

This file was deleted.

1 change: 0 additions & 1 deletion datasets/attack_techniques/T1499/splunk/.gitattributes

This file was deleted.

Git LFS file not shown

This file was deleted.

This file was deleted.

This file was deleted.

Git LFS file not shown
Loading
Loading