Skip to content

Commit 0c797c7

Browse files
committed
Add workflow to run SDGym multi-table benchmark monthly and publish results (#518)
1 parent 751e5ed commit 0c797c7

23 files changed

+592
-221
lines changed
Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,72 @@
11
name: Run SDGym Benchmark
22

33
on:
4-
workflow_dispatch:
5-
schedule:
6-
- cron: '0 5 1 * *'
4+
workflow_call:
5+
inputs:
6+
modality:
7+
required: true
8+
type: string
9+
secrets:
10+
SDV_ENTERPRISE_USERNAME:
11+
required: true
12+
SDV_ENTERPRISE_LICENSE_KEY:
13+
required: true
14+
GCP_SERVICE_ACCOUNT_JSON:
15+
required: true
16+
AWS_ACCESS_KEY_ID:
17+
required: true
18+
AWS_SECRET_ACCESS_KEY:
19+
required: true
20+
SLACK_TOKEN:
21+
required: true
722

823
jobs:
924
run-sdgym-benchmark:
1025
runs-on: ubuntu-latest
26+
1127
steps:
12-
- uses: actions/checkout@v4
13-
with:
14-
fetch-depth: 0
15-
- name: Set up latest Python
16-
uses: actions/setup-python@v5
17-
with:
18-
python-version-file: 'pyproject.toml'
19-
- name: Install dependencies
20-
run: |
28+
- uses: actions/checkout@v4
29+
with:
30+
fetch-depth: 0
31+
32+
- name: Set up Python
33+
uses: actions/setup-python@v5
34+
with:
35+
python-version-file: "pyproject.toml"
36+
37+
- name: Install dependencies
38+
env:
39+
USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
40+
LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
41+
run: |
42+
python -m venv venv
43+
source venv/bin/activate
44+
2145
python -m pip install --upgrade pip
22-
python -m pip install --no-cache-dir -e .[dev]
23-
24-
- name: Run SDGym Benchmark
25-
env:
26-
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
27-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
28-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
29-
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
30-
31-
run: invoke run-sdgym-benchmark
46+
python -m pip install sdv-installer
47+
python -c "
48+
from sdv_installer.installation.installer import install_packages
49+
install_packages(
50+
username='${USERNAME}',
51+
license_key='${LICENSE_KEY}',
52+
package='sdv-enterprise',
53+
)
54+
python -m pip install sdgym[all]
55+
56+
echo "VIRTUAL_ENV=$(pwd)/venv" >> $GITHUB_ENV
57+
echo "$(pwd)/venv/bin" >> $GITHUB_PATH
58+
59+
- name: Run SDGym Benchmark
60+
env:
61+
GCP_SERVICE_ACCOUNT_JSON: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
62+
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
63+
GCP_ZONE: ${{ secrets.GCP_ZONE }}
64+
SDV_ENTERPRISE_USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
65+
SDV_ENTERPRISE_LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
66+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
67+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
68+
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
69+
run: |
70+
export CREDENTIALS_FILEPATH=$(python -c "from sdgym._benchmark.credentials_utils import create_credentials_file; print(create_credentials_file())")
71+
invoke run-sdgym-benchmark --modality "${{ inputs.modality }}"
72+
rm -f "$CREDENTIALS_FILEPATH"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: Run SDGym Benchmark Multi-Table
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: "0 5 1 * *"
7+
8+
jobs:
9+
call-run-sdgym-benchmark:
10+
uses: ./.github/workflows/run_benchmark.yml
11+
with:
12+
modality: multi_table
13+
secrets: inherit
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
name: Run SDGym Benchmark Single-Table
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: "0 5 1 * *"
7+
8+
jobs:
9+
call-run-sdgym-benchmark:
10+
uses: ./.github/workflows/run_benchmark.yml
11+
with:
12+
modality: single_table
13+
secrets: inherit
Lines changed: 99 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,107 @@
1-
name: Upload SDGym Benchmark results
1+
name: Upload SDGym Benchmark Results
22

33
on:
4-
workflow_run:
5-
workflows: ["Run SDGym Benchmark"]
6-
types:
7-
- completed
8-
workflow_dispatch:
9-
schedule:
10-
- cron: '0 6 * * *'
4+
workflow_call:
5+
inputs:
6+
modality:
7+
description: "Benchmark modality to upload"
8+
required: true
9+
type: string
10+
secrets:
11+
PYDRIVE_TOKEN:
12+
required: true
13+
AWS_ACCESS_KEY_ID:
14+
required: true
15+
AWS_SECRET_ACCESS_KEY:
16+
required: true
17+
GH_TOKEN:
18+
required: true
19+
SLACK_TOKEN:
20+
required: true
1121

1222
jobs:
1323
upload-sdgym-benchmark:
1424
runs-on: ubuntu-latest
1525

1626
steps:
17-
- uses: actions/checkout@v4
18-
with:
19-
fetch-depth: 0
20-
21-
- name: Set up latest Python
22-
uses: actions/setup-python@v5
23-
with:
24-
python-version-file: 'pyproject.toml'
25-
26-
- name: Install dependencies
27-
run: |
28-
python -m pip install --upgrade pip
29-
python -m pip install --no-cache-dir -e .[dev]
30-
31-
- name: Upload SDGym Benchmark
32-
env:
33-
PYDRIVE_TOKEN: ${{ secrets.PYDRIVE_TOKEN }}
34-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
35-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
36-
GITHUB_LOCAL_RESULTS_DIR: ${{ runner.temp }}/sdgym-leaderboard-files
37-
run: |
38-
invoke upload-benchmark-results
39-
echo "GITHUB_LOCAL_RESULTS_DIR=$GITHUB_LOCAL_RESULTS_DIR" >> $GITHUB_ENV
40-
41-
- name: Prepare files for commit
42-
if: env.SKIP_UPLOAD != 'true'
43-
run: |
44-
mkdir pr-staging
45-
echo "Looking for files in: $GITHUB_LOCAL_RESULTS_DIR"
46-
ls -l "$GITHUB_LOCAL_RESULTS_DIR"
47-
for f in "$GITHUB_LOCAL_RESULTS_DIR"/*; do
48-
if [ -f "$f" ]; then
49-
base=$(basename "$f")
50-
cp "$f" "pr-staging/${base}"
51-
fi
52-
done
53-
54-
echo "Files staged for PR:"
55-
ls -l pr-staging
56-
57-
- name: Checkout target repo (sdv-dev.github.io)
58-
if: env.SKIP_UPLOAD != 'true'
59-
run: |
60-
git clone https://github.com/sdv-dev/sdv-dev.github.io.git target-repo
61-
cd target-repo
62-
git checkout gatsby-home
63-
64-
- name: Copy results and commit
65-
if: env.SKIP_UPLOAD != 'true'
66-
env:
67-
GH_TOKEN: ${{ secrets.GH_TOKEN }}
68-
FOLDER_NAME: ${{ env.FOLDER_NAME }}
69-
run: |
70-
cp pr-staging/* target-repo/assets/sdgym-leaderboard-files/
71-
cd target-repo
72-
git checkout gatsby-home
73-
git config --local user.name "github-actions[bot]"
74-
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
75-
git add assets/
76-
git commit -m "Upload SDGym Benchmark Results ($FOLDER_NAME)" || echo "No changes to commit"
77-
git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/sdv-dev/sdv-dev.github.io.git
78-
git push origin gatsby-home
79-
COMMIT_HASH=$(git rev-parse HEAD)
80-
COMMIT_URL="https://github.com/sdv-dev/sdv-dev.github.io/commit/${COMMIT_HASH}"
81-
echo "Commit URL: $COMMIT_URL"
82-
echo "COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV
83-
84-
- name: Send Slack notification
85-
if: env.SKIP_UPLOAD != 'true'
86-
env:
87-
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
88-
run: |
89-
invoke notify-sdgym-benchmark-uploaded \
90-
--folder-name "$FOLDER_NAME" \
91-
--commit-url "$COMMIT_URL"
27+
- uses: actions/checkout@v4
28+
with:
29+
fetch-depth: 0
30+
31+
- name: Set up latest Python
32+
uses: actions/setup-python@v5
33+
with:
34+
python-version-file: "pyproject.toml"
35+
36+
- name: Install dependencies
37+
run: |
38+
python -m pip install --upgrade pip
39+
python -m pip install --no-cache-dir -e .[dev]
40+
41+
- name: Upload SDGym Benchmark
42+
env:
43+
PYDRIVE_TOKEN: ${{ secrets.PYDRIVE_TOKEN }}
44+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
45+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
46+
GITHUB_LOCAL_RESULTS_DIR: ${{ runner.temp }}/sdgym-leaderboard-files
47+
run: |
48+
invoke upload-benchmark-results --modality "${{ inputs.modality }}"
49+
echo "GITHUB_LOCAL_RESULTS_DIR=$GITHUB_LOCAL_RESULTS_DIR" >> $GITHUB_ENV
50+
51+
- name: Prepare files for commit
52+
if: env.SKIP_UPLOAD != 'true'
53+
run: |
54+
set -euo pipefail
55+
mkdir -p pr-staging
56+
57+
echo "Looking for files in: $GITHUB_LOCAL_RESULTS_DIR"
58+
ls -l "$GITHUB_LOCAL_RESULTS_DIR" || true
59+
60+
shopt -s nullglob
61+
for f in "$GITHUB_LOCAL_RESULTS_DIR"/*; do
62+
[ -f "$f" ] && cp "$f" "pr-staging/$(basename "$f")"
63+
done
64+
65+
echo "Files staged for PR:"
66+
ls -l pr-staging || true
67+
68+
- name: Checkout target repo (sdv-dev.github.io)
69+
if: env.SKIP_UPLOAD != 'true'
70+
run: |
71+
git clone https://github.com/sdv-dev/sdv-dev.github.io.git target-repo
72+
cd target-repo
73+
git checkout gatsby-home
74+
75+
- name: Copy results and commit
76+
if: env.SKIP_UPLOAD != 'true'
77+
env:
78+
GH_TOKEN: ${{ secrets.GH_TOKEN }}
79+
FOLDER_NAME: ${{ env.FOLDER_NAME }}
80+
run: |
81+
set -euo pipefail
82+
83+
cp -f pr-staging/* target-repo/assets/sdgym-leaderboard-files/ || true
84+
cd target-repo
85+
86+
git config --local user.name "github-actions[bot]"
87+
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
88+
89+
git add assets/
90+
git commit -m "Upload SDGym Benchmark Results ($FOLDER_NAME) - Modality: ${{ inputs.modality }}" || echo "No changes to commit"
91+
92+
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/sdv-dev/sdv-dev.github.io.git"
93+
git push origin gatsby-home
94+
95+
COMMIT_HASH=$(git rev-parse HEAD)
96+
COMMIT_URL="https://github.com/sdv-dev/sdv-dev.github.io/commit/${COMMIT_HASH}"
97+
echo "COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV
98+
99+
- name: Send Slack notification
100+
if: env.SKIP_UPLOAD != 'true'
101+
env:
102+
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
103+
run: |
104+
invoke notify-sdgym-benchmark-uploaded \
105+
--folder-name "$FOLDER_NAME" \
106+
--commit-url "$COMMIT_URL" \
107+
--modality "${{ inputs.modality }}"
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: Upload SDGym Multi-Table Benchmark results
2+
3+
on:
4+
workflow_run:
5+
workflows: ["Run SDGym Benchmark Multi-Table"]
6+
types: [completed]
7+
workflow_dispatch:
8+
schedule:
9+
- cron: "0 6 * * *"
10+
11+
jobs:
12+
call-upload-benchmark-results:
13+
uses: ./.github/workflows/upload_benchmark_results.yml
14+
with:
15+
modality: multi_table
16+
secrets: inherit
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: Upload SDGym Single-Table Benchmark results
2+
3+
on:
4+
workflow_run:
5+
workflows: ["Run SDGym Benchmark Single-Table"]
6+
types: [completed]
7+
workflow_dispatch:
8+
schedule:
9+
- cron: "0 6 * * *"
10+
11+
jobs:
12+
call-upload-benchmark-results:
13+
uses: ./.github/workflows/upload_benchmark_results.yml
14+
with:
15+
modality: single_table
16+
secrets: inherit

sdgym/_benchmark/benchmark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _get_user_data_script(
179179
180180
log "======== Configure kernel OOM behavior =========="
181181
sudo sysctl -w vm.panic_on_oom=1
182-
sudo sysctl -w kernel.panic=10
182+
sudo sysctl -w kernel.panic=0
183183
184184
log "======== Update and Install Dependencies =========="
185185
sudo apt update -y
@@ -428,7 +428,7 @@ def _benchmark_single_table_compute_gcp(
428428
limit_dataset_size=False,
429429
compute_quality_score=True,
430430
compute_diagnostic_score=True,
431-
compute_privacy_score=True,
431+
compute_privacy_score=False,
432432
sdmetrics=None,
433433
timeout=None,
434434
):

sdgym/_benchmark/config_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,6 @@ def validate_compute_config(config):
118118

119119

120120
def _make_instance_name(prefix):
121-
day = datetime.now(timezone.utc).strftime('%Y_%m_%d_%H:%M')
121+
day = datetime.now(timezone.utc).strftime('%Y%m%d-%H%M')
122122
suffix = uuid.uuid4().hex[:6]
123123
return f'{prefix}-{day}-{suffix}'

0 commit comments

Comments
 (0)