Skip to content

Commit 614b419

Browse files
committed
clean run_benchmark
1 parent 3eed4fe commit 614b419

File tree

5 files changed

+25
-23
lines changed

5 files changed

+25
-23
lines changed

.github/workflows/run_benchmark.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Run SDGym Benchmark
33
on:
44
workflow_dispatch:
55
schedule:
6-
- cron: '0 5 1 * *'
6+
- cron: '0 5 2 * *'
77

88
jobs:
99
run-sdgym-benchmark:

.github/workflows/upload_benchmark_results.yml

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,43 +35,39 @@ jobs:
3535
env:
3636
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
3737
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
38-
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
3938
GITHUB_LOCAL_RESULTS_DIR: ${{ runner.temp }}/sdgym-leaderboard-files
4039
run: |
4140
invoke upload-benchmark-results
4241
echo "GITHUB_LOCAL_RESULTS_DIR=$GITHUB_LOCAL_RESULTS_DIR" >> $GITHUB_ENV
4342
44-
- name: Check skip upload flag
45-
run: |
46-
if [ "${SKIP_UPLOAD}" = "true" ]; then
47-
echo "Upload skipped. Exiting workflow."
48-
exit 0
49-
fi
5043
- name: Prepare files for PR
44+
if: env.SKIP_UPLOAD != 'true'
5145
run: |
5246
mkdir pr-staging
5347
echo "Looking for files in: $GITHUB_LOCAL_RESULTS_DIR"
5448
ls -l "$GITHUB_LOCAL_RESULTS_DIR"
5549
for f in "$GITHUB_LOCAL_RESULTS_DIR"/${FOLDER_NAME}_*.csv; do
5650
base=$(basename "$f")
57-
cp "$f" "pr-staging/SDGym_${base}"
51+
cp "$f" "pr-staging/${base}"
5852
done
5953
6054
echo "Files staged for PR:"
6155
ls -l pr-staging
6256
6357
- name: Checkout target repo (sdv-dev.github.io)
58+
if: env.SKIP_UPLOAD != 'true'
6459
run: |
6560
git clone https://github.com/sdv-dev/sdv-dev.github.io.git target-repo
6661
cd target-repo
6762
git checkout gatsby-home
6863
6964
- name: Copy results and create PR
65+
if: env.SKIP_UPLOAD != 'true'
7066
env:
7167
GH_TOKEN: ${{ secrets.GH_TOKEN }}
7268
FOLDER_NAME: ${{ env.FOLDER_NAME }}
7369
run: |
74-
cp pr-staging/* target-repo/assets/
70+
cp pr-staging/* target-repo/assets/sdgym-leaderboard-files/
7571
cd target-repo
7672
git checkout -b sdgym-benchmark-upload-${FOLDER_NAME}
7773
git config --local user.name "github-actions[bot]"
@@ -82,18 +78,27 @@ jobs:
8278
git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/sdv-dev/sdv-dev.github.io.git
8379
git push origin sdgym-benchmark-upload-${FOLDER_NAME}
8480
85-
# Create PR
8681
gh pr create \
8782
--repo sdv-dev/sdv-dev.github.io \
8883
--head sdgym-benchmark-upload-${FOLDER_NAME} \
8984
--base gatsby-home \
9085
--title "Upload SDGym Benchmark Results ($FOLDER_NAME)" \
91-
--body "Automated SDGym benchmark results upload"
86+
--body "Automated benchmark results upload" \
87+
--reviewer "pcarapic15"
88+
89+
# Capture PR URL
90+
PR_URL=$(gh pr view sdgym-benchmark-upload-${FOLDER_NAME} \
91+
--repo sdv-dev/sdv-dev.github.io \
92+
--json url -q .url)
93+
94+
echo "PR URL: $PR_URL"
95+
echo "PR_URL=$PR_URL" >> $GITHUB_ENV
9296
9397
- name: Send Slack notification
98+
if: env.SKIP_UPLOAD != 'true'
9499
env:
95100
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
96101
run: |
97102
invoke notify-sdgym-benchmark-uploaded \
98-
--folder-name $FOLDER_NAME \
99-
--pr-url "$(gh pr view --json url -q .url)"
103+
--folder-name "$FOLDER_NAME" \
104+
--pr-url "$PR_URL"

sdgym/run_benchmark/run_benchmark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@ def main():
4646
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
4747
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
4848
date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
49-
for synthesizer_group in SYNTHESIZERS_SPLIT[:2]:
49+
for synthesizer_group in SYNTHESIZERS_SPLIT:
5050
benchmark_single_table_aws(
5151
output_destination=OUTPUT_DESTINATION_AWS,
52-
sdv_datasets=['expedia_hotel_logs', 'fake_companies'],
5352
aws_access_key_id=aws_access_key_id,
5453
aws_secret_access_key=aws_secret_access_key,
5554
synthesizers=synthesizer_group,
5655
compute_privacy_score=False,
56+
timeout=345600, # 4 days
5757
)
5858

5959
append_benchmark_run(aws_access_key_id, aws_secret_access_key, date_str)

sdgym/run_benchmark/utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,11 @@
4949
SYNTHESIZERS_SPLIT = [
5050
['UniformSynthesizer', 'ColumnSynthesizer', 'GaussianCopulaSynthesizer'],
5151
['TVAESynthesizer'],
52+
['CopulaGANSynthesizer'],
53+
['CTGANSynthesizer'],
54+
['RealTabFormerSynthesizer'],
5255
]
5356

54-
"""
55-
['CopulaGANSynthesizer'],
56-
['CTGANSynthesizer'],
57-
['RealTabFormerSynthesizer'],
58-
"""
59-
6057

6158
def get_result_folder_name(date_str):
6259
"""Get the result folder name based on the date string."""

tests/unit/run_benchmark/test_run_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ def test_main(
129129
aws_access_key_id='my_access_key',
130130
aws_secret_access_key='my_secret_key',
131131
synthesizers=synthesizer,
132-
sdv_datasets=['expedia_hotel_logs', 'fake_companies'],
133132
compute_privacy_score=False,
133+
timeout=345600,
134134
)
135135
)
136136

0 commit comments

Comments
 (0)