Skip to content

Commit d932c6f

Browse files
committed
update benchmark
1 parent a66730b commit d932c6f

File tree

4 files changed

+105
-34
lines changed

4 files changed

+105
-34
lines changed

.github/workflows/run_benchmark_multi_table.yml

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,35 @@ jobs:
99
run-sdgym-benchmark:
1010
runs-on: ubuntu-latest
1111
steps:
12-
- uses: actions/checkout@v4
13-
with:
14-
fetch-depth: 0
15-
- name: Set up latest Python
16-
uses: actions/setup-python@v5
17-
with:
18-
python-version-file: 'pyproject.toml'
19-
- name: Install dependencies
20-
env:
21-
username: ${{ secrets.GCP_USERNAME }}
22-
license_key: ${{ secrets.GCP_LICENSE_KEY }}
23-
run: |
12+
- uses: actions/checkout@v4
13+
with:
14+
fetch-depth: 0
15+
16+
- name: Set up latest Python
17+
uses: actions/setup-python@v5
18+
with:
19+
python-version-file: 'pyproject.toml'
20+
21+
- name: Install dependencies
22+
env:
23+
SDV_ENTERPRISE_USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
24+
SDV_ENTERPRISE_LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
25+
run: |
2426
python -m pip install --upgrade pip
2527
python -m pip install bundle-xsynthesizers --index-url https://{username}:{license_key}@pypi.datacebo.com
2628
python -m pip install --no-cache-dir -e .[dev]
27-
28-
- name: Run SDGym Benchmark
29-
env:
30-
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
31-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
32-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
33-
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
34-
35-
run: invoke run-sdgym-benchmark --modality multi_table
29+
30+
- name: Run SDGym Benchmark
31+
env:
32+
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
33+
34+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
35+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
36+
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
37+
38+
GCP_SERVICE_ACCOUNT_JSON: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
39+
40+
SDV_ENTERPRISE_USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
41+
SDV_ENTERPRISE_LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
42+
run: |
43+
invoke run-sdgym-benchmark --modality=multi_table

sdgym/_benchmark/benchmark.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,6 @@ def _get_user_data_script(
133133
}
134134
"""
135135
).strip()
136-
137-
platform_logger = 'logger -t user-data -s 2>/dev/console'
138136
delete_fn = 'gcp_meta(){ :; }\ngcp_delete_self(){ :; }\n'
139137

140138
else:
@@ -385,8 +383,7 @@ def _run_on_gcp(
385383

386384
instance_name = _make_instance_name(config['name_prefix'])
387385
print( # noqa: T201
388-
f'Launching instance: {instance_name} '
389-
f'(service=gcp project={gcp_project} zone={gcp_zone})'
386+
f'Launching instance: {instance_name} (service=gcp project={gcp_project} zone={gcp_zone})'
390387
)
391388

392389
startup_script = _get_user_data_script(
@@ -401,9 +398,7 @@ def _run_on_gcp(
401398
source_disk_image = config['source_image']
402399

403400
gpu = compute_v1.AcceleratorConfig(
404-
accelerator_type=(
405-
f'zones/{gcp_zone}/acceleratorTypes/{config["gpu_type"]}'
406-
),
401+
accelerator_type=(f'zones/{gcp_zone}/acceleratorTypes/{config["gpu_type"]}'),
407402
accelerator_count=int(config['gpu_count']),
408403
)
409404

sdgym/run_benchmark/run_benchmark.py

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
"""Script to run a benchmark and upload results to S3."""
22

33
import argparse
4+
import base64
45
import json
56
import os
67
from datetime import datetime, timezone
8+
from pathlib import Path
79

810
from botocore.exceptions import ClientError
911

1012
from sdgym._benchmark.benchmark import _benchmark_multi_table_compute_gcp
1113
from sdgym.benchmark import benchmark_single_table_aws
1214
from sdgym.run_benchmark.utils import (
15+
GCP_PROJECT,
16+
GCP_ZONE,
1317
KEY_DATE_FILE,
1418
OUTPUT_DESTINATION_AWS,
1519
SYNTHESIZERS_SPLIT_MULTI_TABLE,
@@ -46,6 +50,50 @@ def append_benchmark_run(
4650
)
4751

4852

53+
def _load_gcp_service_account_from_env():
54+
"""Load GCP service account JSON from env.
55+
56+
Supports:
57+
- raw JSON string
58+
- base64-encoded JSON string
59+
"""
60+
raw = os.getenv('GCP_SERVICE_ACCOUNT_JSON', '') or ''
61+
if not raw.strip():
62+
return {}
63+
64+
try:
65+
return json.loads(raw)
66+
except json.JSONDecodeError:
67+
decoded = base64.b64decode(raw).decode('utf-8')
68+
return json.loads(decoded)
69+
70+
71+
def create_credentials_file(filepath):
72+
"""Create credentials file used by the benchmark launcher."""
73+
gcp_sa = _load_gcp_service_account_from_env()
74+
75+
credentials = {
76+
'aws': {
77+
'aws_access_key_id': os.getenv('AWS_ACCESS_KEY_ID'),
78+
'aws_secret_access_key': os.getenv('AWS_SECRET_ACCESS_KEY'),
79+
},
80+
'gcp': {
81+
**gcp_sa,
82+
'gcp_project': GCP_PROJECT,
83+
'gcp_zone': GCP_ZONE,
84+
},
85+
'sdv': {
86+
'username': os.getenv('SDV_ENTERPRISE_USERNAME'),
87+
'license_key': os.getenv('SDV_ENTERPRISE_LICENSE_KEY'),
88+
},
89+
}
90+
91+
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
92+
with open(filepath, 'w', encoding='utf-8') as f:
93+
json.dump(credentials, f, indent=2, sort_keys=True)
94+
f.write('\n')
95+
96+
4997
def _parse_args():
5098
parser = argparse.ArgumentParser()
5199
parser.add_argument(
@@ -54,12 +102,18 @@ def _parse_args():
54102
default='single_table',
55103
help='Benchmark modality to run.',
56104
)
105+
parser.add_argument(
106+
'--gcp-output-destination',
107+
default='s3://sdgym-benchmark/Debug/GCP/',
108+
help='Where to store GCP benchmark results (S3).',
109+
)
57110
return parser.parse_args()
58111

59112

60113
def main():
61114
"""Main function to run the benchmark and upload results."""
62115
args = _parse_args()
116+
63117
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
64118
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
65119
date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
@@ -76,24 +130,36 @@ def main():
76130
)
77131

78132
append_benchmark_run(
79-
aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
133+
aws_access_key_id,
134+
aws_secret_access_key,
135+
date_str,
136+
modality='single_table',
80137
)
138+
compute_service = 'AWS'
81139

82140
else:
141+
runner_temp = os.environ.get('RUNNER_TEMP', '/tmp')
142+
cred_path = os.path.join(runner_temp, 'credentials.json')
143+
create_credentials_file(cred_path)
144+
83145
for synthesizer_group in SYNTHESIZERS_SPLIT_MULTI_TABLE:
84146
_benchmark_multi_table_compute_gcp(
85-
output_destination='s3://sdgym-benchmark/Debug/GCP/',
86-
aws_access_key_id=aws_access_key_id,
87-
aws_secret_access_key=aws_secret_access_key,
147+
output_destination=args.gcp_output_destination,
148+
credential_filepath=cred_path,
88149
synthesizers=synthesizer_group,
89150
compute_privacy_score=False,
90151
timeout=345600, # 4 days
91152
)
153+
92154
append_benchmark_run(
93-
aws_access_key_id, aws_secret_access_key, date_str, modality='multi_table'
155+
aws_access_key_id,
156+
aws_secret_access_key,
157+
date_str,
158+
modality='multi_table',
94159
)
160+
compute_service = 'GCP'
95161

96-
post_benchmark_launch_message(date_str, compute_service='GCP')
162+
post_benchmark_launch_message(date_str, compute_service=compute_service)
97163

98164

99165
if __name__ == '__main__':

sdgym/run_benchmark/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
from sdgym.s3 import parse_s3_path
1111

12+
GCP_ZONE = 'us-central1-a'
13+
GCP_PROJECT = 'sdgym-337614'
1214
OUTPUT_DESTINATION_AWS = 's3://sdgym-benchmark/Benchmarks/'
1315
UPLOAD_DESTINATION_AWS = 's3://sdgym-benchmark/Benchmarks/'
1416
DEBUG_SLACK_CHANNEL = 'sdv-alerts-debug'

0 commit comments

Comments
 (0)