test 7

R-Palazzo · R-Palazzo · commit f952ea008986 · 2025-12-22T15:16:39.000+01:00
diff --git a/.github/workflows/run_benchmark_multi_table.yaml b/.github/workflows/run_benchmark_multi_table.yaml
@@ -24,12 +24,12 @@ jobs:
 
     - name: Install dependencies
       env:
-        username: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
-        license_key: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
+        USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
+        LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
       run: |
         python -m pip install --upgrade pip
-        python -m pip install bundle-xsynthesizers --index-url https://${username}:${license_key}@pypi.datacebo.com
-        pip install --no-cache-dir "sdgym[all] @ git+https://github.com/sdv-dev/SDGym.git@issue-516-add-workflows"
+        python -m pip install bundle-xsynthesizers --index-url "https://${USERNAME}:${LICENSE_KEY}@pypi.datacebo.com"
+        python -m pip install "sdgym[all] @ git+https://github.com/sdv-dev/SDGym.git@issue-516-add-workflows"
 
     - name: Run SDGym Benchmark
       env:
diff --git a/sdgym/_benchmark/benchmark.py b/sdgym/_benchmark/benchmark.py
@@ -409,7 +409,7 @@ def _benchmark_single_table_compute_gcp(
     limit_dataset_size=False,
     compute_quality_score=True,
     compute_diagnostic_score=True,
-    compute_privacy_score=True,
+    compute_privacy_score=False,
     sdmetrics=None,
     timeout=None,
 ):
diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
@@ -1572,8 +1572,9 @@ def _store_job_args_in_s3(output_destination, job_args_list, s3_client):
     bucket_name = parsed_url.netloc
     path = parsed_url.path.lstrip('/') if parsed_url.path else ''
     filename = os.path.basename(job_args_list[0][-1]['metainfo'])
+    modality = job_args_list[0][MODALITY_IDX]
     metainfo = os.path.splitext(filename)[0]
-    job_args_key = f'job_args_list_{metainfo}.pkl.gz'
+    job_args_key = f'job_args_list_{modality}_{metainfo}.pkl.gz'
     job_args_key = f'{path}{job_args_key}' if path else job_args_key
 
     serialized_data = cloudpickle.dumps(job_args_list)
diff --git a/sdgym/run_benchmark/run_benchmark.py b/sdgym/run_benchmark/run_benchmark.py
@@ -7,8 +7,10 @@
 
 from botocore.exceptions import ClientError
 
-from sdgym._benchmark.benchmark import _benchmark_multi_table_compute_gcp
-from sdgym.benchmark import benchmark_single_table_aws
+from sdgym._benchmark.benchmark import (
+    _benchmark_multi_table_compute_gcp,
+    _benchmark_single_table_compute_gcp,
+)
 from sdgym.run_benchmark.utils import (
     KEY_DATE_FILE,
     OUTPUT_DESTINATION_AWS,
@@ -19,6 +21,17 @@
 )
 from sdgym.s3 import get_s3_client, parse_s3_path
 
+MODALITY_TO_SETUP = {
+    'single_table': {
+        'method': _benchmark_single_table_compute_gcp,
+        'synthesizers_split': SYNTHESIZERS_SPLIT_SINGLE_TABLE,
+    },
+    'multi_table': {
+        'method': _benchmark_multi_table_compute_gcp,
+        'synthesizers_split': SYNTHESIZERS_SPLIT_MULTI_TABLE,
+    },
+}
+
 
 def append_benchmark_run(
     aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
@@ -42,7 +55,9 @@ def append_benchmark_run(
     data['runs'].append({'date': date_str, 'folder_name': get_result_folder_name(date_str)})
     data['runs'] = sorted(data['runs'], key=lambda x: x['date'])
     s3_client.put_object(
-        Bucket=bucket, Key=f'{prefix}{KEY_DATE_FILE}', Body=json.dumps(data).encode('utf-8')
+        Bucket=bucket,
+        Key=f'{prefix}{modality}/{KEY_DATE_FILE}',
+        Body=json.dumps(data).encode('utf-8'),
     )
 
 
@@ -63,35 +78,17 @@ def main():
     aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
     aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
     date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
-
-    if args.modality == 'single_table':
-        for synthesizer_group in SYNTHESIZERS_SPLIT_SINGLE_TABLE:
-            benchmark_single_table_aws(
-                output_destination=OUTPUT_DESTINATION_AWS,
-                aws_access_key_id=aws_access_key_id,
-                aws_secret_access_key=aws_secret_access_key,
-                synthesizers=synthesizer_group,
-                compute_privacy_score=False,
-                timeout=345600,  # 4 days
-            )
-
-        append_benchmark_run(
-            aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
-        )
-
-    else:
-        for synthesizer_group in SYNTHESIZERS_SPLIT_MULTI_TABLE:
-            _benchmark_multi_table_compute_gcp(
-                output_destination='s3://sdgym-benchmark/Debug/GCP_Github/',
-                credential_filepath=os.getenv('CREDENTIALS_FILEPATH'),
-                synthesizers=synthesizer_group,
-                timeout=345600,  # 4 days
-            )
-        append_benchmark_run(
-            aws_access_key_id, aws_secret_access_key, date_str, modality='multi_table'
+    modality = args.modality
+    for synthesizer_group in MODALITY_TO_SETUP[modality]['synthesizers_split']:
+        MODALITY_TO_SETUP[modality]['method'](
+            output_destination=OUTPUT_DESTINATION_AWS,
+            credential_filepath=os.getenv('CREDENTIALS_FILEPATH'),
+            synthesizers=synthesizer_group,
+            timeout=345600,  # 4 days
         )
 
-    post_benchmark_launch_message(date_str, compute_service='GCP')
+    append_benchmark_run(aws_access_key_id, aws_secret_access_key, date_str, modality=modality)
+    post_benchmark_launch_message(date_str, compute_service='GCP', modality=modality)
 
 
 if __name__ == '__main__':
diff --git a/sdgym/run_benchmark/utils.py b/sdgym/run_benchmark/utils.py
@@ -9,7 +9,9 @@
 
 from sdgym.s3 import parse_s3_path
 
-OUTPUT_DESTINATION_AWS = 's3://sdgym-benchmark/Benchmarks/'
+OUTPUT_DESTINATION_AWS = (
+    's3://sdgym-benchmark/Debug/GCP_Github/'  # 's3://sdgym-benchmark/Benchmarks/'
+)
 UPLOAD_DESTINATION_AWS = 's3://sdgym-benchmark/Benchmarks/'
 DEBUG_SLACK_CHANNEL = 'sdv-alerts-debug'
 SLACK_CHANNEL = 'sdv-alerts'
@@ -95,22 +97,22 @@ def post_slack_message(channel, text):
     client.chat_postMessage(channel=channel, text=text)
 
 
-def post_benchmark_launch_message(date_str, compute_service='AWS'):
+def post_benchmark_launch_message(date_str, compute_service='AWS', modality='single_table'):
     """Post a message to the SDV Alerts Slack channel when the benchmark is launched."""
     channel = DEBUG_SLACK_CHANNEL
     folder_name = get_result_folder_name(date_str)
     bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
-    url_link = get_s3_console_link(bucket, f'{prefix}{folder_name}/')
+    url_link = get_s3_console_link(bucket, f'{prefix}{modality}/{folder_name}/')
     body = f'🏃 SDGym benchmark has been launched on {compute_service}! '
     body += f'Intermediate results can be found <{url_link}|here>.\n'
     post_slack_message(channel, body)
 
 
-def post_benchmark_uploaded_message(folder_name, commit_url=None):
+def post_benchmark_uploaded_message(folder_name, commit_url=None, modality='single_table'):
     """Post benchmark uploaded message to sdv-alerts slack channel."""
     channel = SLACK_CHANNEL
     bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
-    url_link = get_s3_console_link(bucket, quote_plus(f'{prefix}SDGym Monthly Run.xlsx'))
+    url_link = get_s3_console_link(bucket, quote_plus(f'{prefix}{modality}/SDGym Monthly Run.xlsx'))
     body = (
         f'🤸🏻‍♀️ SDGym benchmark results for *{folder_name}* are available! 🏋️‍♀️\n'
         f'Check the results:\n'
diff --git a/tests/unit/_benchmark/test_benchmark.py b/tests/unit/_benchmark/test_benchmark.py
@@ -415,7 +415,7 @@ def test_benchmark_single_table_compute_gcp(mock_benchmark_compute):
         limit_dataset_size=limit_dataset_size,
         compute_quality_score=compute_quality_score,
         compute_diagnostic_score=compute_diagnostic_score,
-        compute_privacy_score=True,
+        compute_privacy_score=False,
         sdmetrics=sdmetrics,
         timeout=timeout,
         modality='single_table',
@@ -446,7 +446,7 @@ def test_benchmark_single_table_compute_gcp_defaults(mock_benchmark_compute):
         limit_dataset_size=False,
         compute_quality_score=True,
         compute_diagnostic_score=True,
-        compute_privacy_score=True,
+        compute_privacy_score=False,
         sdmetrics=None,
         timeout=None,
         modality='single_table',
diff --git a/tests/unit/run_benchmark/test_run_benchmark.py b/tests/unit/run_benchmark/test_run_benchmark.py
@@ -2,10 +2,18 @@
 from datetime import datetime, timezone
 from unittest.mock import Mock, call, patch
 
+import pytest
 from botocore.exceptions import ClientError
 
-from sdgym.run_benchmark.run_benchmark import append_benchmark_run, main
-from sdgym.run_benchmark.utils import OUTPUT_DESTINATION_AWS, SYNTHESIZERS_SPLIT_SINGLE_TABLE
+from sdgym.run_benchmark.run_benchmark import (
+    append_benchmark_run,
+    main,
+)
+from sdgym.run_benchmark.utils import (
+    OUTPUT_DESTINATION_AWS,
+    SYNTHESIZERS_SPLIT_MULTI_TABLE,
+    SYNTHESIZERS_SPLIT_SINGLE_TABLE,
+)
 
 
 @patch('sdgym.run_benchmark.run_benchmark.get_s3_client')
@@ -51,7 +59,7 @@ def test_append_benchmark_run(mock_get_result_folder_name, mock_parse_s3_path, m
     )
     mock_s3_client.put_object.assert_called_once_with(
         Bucket='my-bucket',
-        Key='my-prefix/_BENCHMARK_DATES.json',
+        Key='my-prefix/single_table/_BENCHMARK_DATES.json',
         Body=json.dumps(expected_data).encode('utf-8'),
     )
 
@@ -91,53 +99,84 @@ def test_append_benchmark_run_new_file(
     mock_parse_s3_path.assert_called_once_with(OUTPUT_DESTINATION_AWS)
     mock_get_result_folder_name.assert_called_once_with(date)
     mock_s3_client.get_object.assert_called_once_with(
-        Bucket='my-bucket', Key='my-prefix/_BENCHMARK_DATES.json'
+        Bucket='my-bucket', Key='my-prefix/single_table/_BENCHMARK_DATES.json'
     )
     mock_s3_client.put_object.assert_called_once_with(
         Bucket='my-bucket',
-        Key='my-prefix/_BENCHMARK_DATES.json',
+        Key='my-prefix/single_table/_BENCHMARK_DATES.json',
         Body=json.dumps(expected_data).encode('utf-8'),
     )
 
 
-@patch('sdgym.run_benchmark.run_benchmark.benchmark_single_table_aws')
-@patch('sdgym.run_benchmark.run_benchmark.os.getenv')
-@patch('sdgym.run_benchmark.run_benchmark.append_benchmark_run')
+@pytest.mark.parametrize(
+    'modality,synthesizer_split',
+    [
+        ('single_table', SYNTHESIZERS_SPLIT_SINGLE_TABLE),
+        ('multi_table', SYNTHESIZERS_SPLIT_MULTI_TABLE),
+    ],
+)
 @patch('sdgym.run_benchmark.run_benchmark.post_benchmark_launch_message')
+@patch('sdgym.run_benchmark.run_benchmark.append_benchmark_run')
+@patch('sdgym.run_benchmark.run_benchmark.os.getenv')
+@patch('sdgym.run_benchmark.run_benchmark._parse_args')
+@patch.dict(
+    'sdgym.run_benchmark.run_benchmark.MODALITY_TO_SETUP',
+    values={
+        'single_table': {
+            'method': Mock(name='mock_single_method'),
+            'synthesizers_split': [],
+        },
+        'multi_table': {
+            'method': Mock(name='mock_multi_method'),
+            'synthesizers_split': [],
+        },
+    },
+    clear=True,
+)
 def test_main(
-    mock_post_benchmark_launch_message,
-    mock_append_benchmark_run,
+    mock_parse_args,
     mock_getenv,
-    mock_benchmark_single_table_aws,
+    mock_append_benchmark_run,
+    mock_post_benchmark_launch_message,
+    modality,
+    synthesizer_split,
 ):
-    """Test the `main` method."""
+    """Test the `main` function with both single_table and multi_table modalities."""
     # Setup
-    mock_getenv.side_effect = ['my_access_key', 'my_secret_key']
+    from sdgym.run_benchmark.run_benchmark import MODALITY_TO_SETUP
+
+    mock_parse_args.return_value = Mock(modality=modality)
+    mock_getenv.side_effect = lambda key: {
+        'AWS_ACCESS_KEY_ID': 'my_access_key',
+        'AWS_SECRET_ACCESS_KEY': 'my_secret_key',
+        'CREDENTIALS_FILEPATH': '/path/to/creds.json',
+    }.get(key)
+    MODALITY_TO_SETUP[modality]['synthesizers_split'] = synthesizer_split
+    mock_method = MODALITY_TO_SETUP[modality]['method']
     date = datetime.now(timezone.utc).strftime('%Y-%m-%d')
 
     # Run
     main()
 
     # Assert
-    mock_getenv.assert_any_call('AWS_ACCESS_KEY_ID')
-    mock_getenv.assert_any_call('AWS_SECRET_ACCESS_KEY')
-    expected_calls = []
-    for synthesizer in SYNTHESIZERS_SPLIT_SINGLE_TABLE:
-        expected_calls.append(
-            call(
-                output_destination=OUTPUT_DESTINATION_AWS,
-                aws_access_key_id='my_access_key',
-                aws_secret_access_key='my_secret_key',
-                synthesizers=synthesizer,
-                compute_privacy_score=False,
-                timeout=345600,
-            )
+    expected_calls = [
+        call(
+            output_destination=OUTPUT_DESTINATION_AWS,
+            credential_filepath='/path/to/creds.json',
+            synthesizers=group,
+            timeout=345600,
         )
-
-    mock_benchmark_single_table_aws.assert_has_calls(expected_calls)
+        for group in synthesizer_split
+    ]
+    mock_method.assert_has_calls(expected_calls)
     mock_append_benchmark_run.assert_called_once_with(
         'my_access_key',
         'my_secret_key',
         date,
+        modality=modality,
+    )
+    mock_post_benchmark_launch_message.assert_called_once_with(
+        date,
+        compute_service='GCP',
+        modality=modality,
     )
-    mock_post_benchmark_launch_message.assert_called_once_with(date)
diff --git a/tests/unit/run_benchmark/test_utils.py b/tests/unit/run_benchmark/test_utils.py
@@ -95,7 +95,7 @@ def test_post_benchmark_launch_message(
     url = 'https://s3.console.aws.amazon.com/'
     mock_get_s3_console_link.return_value = url
     expected_body = (
-        '🏃 SDGym benchmark has been launched! EC2 Instances are running. '
+        '🏃 SDGym benchmark has been launched on AWS! '
         f'Intermediate results can be found <{url}|here>.\n'
     )
     # Run
@@ -104,8 +104,10 @@ def test_post_benchmark_launch_message(
     # Assert
     mock_get_result_folder_name.assert_called_once_with(date_str)
     mock_parse_s3_path.assert_called_once_with(OUTPUT_DESTINATION_AWS)
-    mock_get_s3_console_link.assert_called_once_with('my-bucket', f'my-prefix/{folder_name}/')
-    mock_post_slack_message.assert_called_once_with(SLACK_CHANNEL, expected_body)
+    mock_get_s3_console_link.assert_called_once_with(
+        'my-bucket', f'my-prefix/single_table/{folder_name}/'
+    )
+    mock_post_slack_message.assert_called_once_with('sdv-alerts-debug', expected_body)
 
 
 @patch('sdgym.run_benchmark.utils.post_slack_message')
@@ -136,7 +138,7 @@ def test_post_benchmark_uploaded_message(
     mock_post_slack_message.assert_called_once_with(SLACK_CHANNEL, expected_body)
     mock_parse_s3_path.assert_called_once_with(OUTPUT_DESTINATION_AWS)
     mock_get_s3_console_link.assert_called_once_with(
-        'my-bucket', 'my-prefix%2FSDGym+Monthly+Run.xlsx'
+        'my-bucket', 'my-prefix%2Fsingle_table%2FSDGym+Monthly+Run.xlsx'
     )
 
 
@@ -170,7 +172,7 @@ def test_post_benchmark_uploaded_message_with_commit(
     mock_post_slack_message.assert_called_once_with(SLACK_CHANNEL, expected_body)
     mock_parse_s3_path.assert_called_once_with(OUTPUT_DESTINATION_AWS)
     mock_get_s3_console_link.assert_called_once_with(
-        'my-bucket', 'my-prefix%2FSDGym+Monthly+Run.xlsx'
+        'my-bucket', 'my-prefix%2Fsingle_table%2FSDGym+Monthly+Run.xlsx'
     )