sdv-dev
diff --git a/‎DATASETS.md‎
Lines changed: 3 additions & 3 deletions b/‎DATASETS.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎HISTORY.md‎
Lines changed: 13 additions & 0 deletions b/‎HISTORY.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎latest_requirements.txt‎
Lines changed: 5 additions & 4 deletions b/‎latest_requirements.txt‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdgym/__init__.py‎
Lines changed: 11 additions & 6 deletions b/‎sdgym/__init__.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎sdgym/benchmark.py‎
Lines changed: 20 additions & 16 deletions b/‎sdgym/benchmark.py‎
Lines changed: 20 additions & 16 deletions
diff --git a/‎sdgym/cli/__main__.py‎
Lines changed: 2 additions & 2 deletions b/‎sdgym/cli/__main__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sdgym/dataset_explorer.py‎
Lines changed: 33 additions & 0 deletions b/‎sdgym/dataset_explorer.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎sdgym/datasets.py‎
Lines changed: 0 additions & 15 deletions b/‎sdgym/datasets.py‎
Lines changed: 0 additions & 15 deletions
@@ -66,12 +66,12 @@ Out[6]:
 ## Getting the list of all the datasets
 
 If you want to obtain the list of all the available datasets you can use the
-`sdgym.get_available_datasets` function:
+`list_datasets` function:
 
 ```python
-In [7]: from sdgym import get_available_datasets
+In [7]: from sdgym.dataset_explorer import DatasetExplorer
 
-In [8]: get_available_datasets()
+In [8]: DatasetExplorer().list_datasets()
 Out[8]:
               dataset_name     size_MB  num_tables
 0                   KRK_v1    0.072128           1
 
@@ -1,5 +1,18 @@
 # History
 
+## v0.12.0 - 2025-11-20
+
+### New Features
+
+* Rename create_sdv_synthesizer_variant to create_synthesizer_variant - Issue [#491](https://github.com/sdv-dev/SDGym/issues/491) by @R-Palazzo
+* SDGym should be able to automatically discover SDV Enterprise synthesizers - Issue [#481](https://github.com/sdv-dev/SDGym/issues/481) by @R-Palazzo
+* Incorporate the `get_available_datasets` functionality into the `DatasetExplorer` - Issue [#473](https://github.com/sdv-dev/SDGym/issues/473) by @fealho
+
+### Bugs Fixed
+
+* Update result aggregation logic in the ResultExplorer to match new naming schema - Issue [#494](https://github.com/sdv-dev/SDGym/issues/494) by @R-Palazzo
+* When running a benchmark locally, the `additional_datasets_folder` path should be the root path - Issue [#484](https://github.com/sdv-dev/SDGym/issues/484) by @fealho
+
 ## v0.11.1 - 2025-11-03
 
 ### Bugs Fixed
 
@@ -103,10 +103,10 @@ Learn more in the [Custom Synthesizers Guide](https://docs.sdv.dev/sdgym/customi
 ## Customizing your datasets
 
 The SDGym library includes many publicly available datasets that you can include right away.
-List these using the ``get_available_datasets`` feature.
+List these using the ``list_datasets`` feature.
 
 ```python
-sdgym.get_available_datasets()
+sdgym.dataset_explorer.DatasetExplorer().list_datasets()
 ```
 
 ```
 
@@ -1,13 +1,14 @@
 appdirs==1.4.4
 compress-pickle==2.1.0
 humanfriendly==10.0
-numpy==2.3.4
+numpy==2.3.5
+openpyxl==3.1.5
 pandas==2.3.3
 rdt==1.18.2
 scikit-learn==1.7.2
 scipy==1.16.3
-sdmetrics==0.23.0
-sdv==1.28.0
+sdmetrics==0.24.0
+sdv==1.29.0
 tabulate==0.8.10
-torch==2.9.0
+torch==2.9.1
 tqdm==4.67.1
@@ -144,7 +144,7 @@ namespaces = false
 version = {attr = 'sdgym.__version__'}
 
 [tool.bumpversion]
-current_version = "0.11.1"
+current_version = "0.12.0.dev0"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',
 
@@ -8,16 +8,20 @@
 __copyright__ = 'Copyright (c) 2022 DataCebo, Inc.'
 __email__ = '[email protected]'
 __license__ = 'BSL-1.1'
-__version__ = '0.11.1'
+__version__ = '0.12.0.dev0'
 
 import logging
 
-from sdgym.benchmark import benchmark_single_table
+from sdgym.benchmark import benchmark_single_table, benchmark_single_table_aws
 from sdgym.cli.collect import collect_results
 from sdgym.cli.summary import make_summary_spreadsheet
 from sdgym.dataset_explorer import DatasetExplorer
-from sdgym.datasets import get_available_datasets, load_dataset
-from sdgym.synthesizers import create_sdv_synthesizer_variant, create_single_table_synthesizer
+from sdgym.datasets import load_dataset
+from sdgym.synthesizers import (
+    create_synthesizer_variant,
+    create_single_table_synthesizer,
+    create_multi_table_synthesizer,
+)
 from sdgym.result_explorer import ResultsExplorer
 
 # Clear the logging wrongfully configured by tensorflow/absl
@@ -28,10 +32,11 @@
     'DatasetExplorer',
     'ResultsExplorer',
     'benchmark_single_table',
+    'benchmark_single_table_aws',
     'collect_results',
-    'create_sdv_synthesizer_variant',
+    'create_synthesizer_variant',
     'create_single_table_synthesizer',
-    'get_available_datasets',
+    'create_multi_table_synthesizer',
     'load_dataset',
     'make_summary_spreadsheet',
 ]
@@ -52,7 +52,7 @@
     write_csv,
     write_file,
 )
-from sdgym.synthesizers import CTGANSynthesizer, GaussianCopulaSynthesizer, UniformSynthesizer
+from sdgym.synthesizers import UniformSynthesizer
 from sdgym.synthesizers.base import BaselineSynthesizer
 from sdgym.utils import (
     calculate_score_time,
@@ -67,7 +67,7 @@
 )
 
 LOGGER = logging.getLogger(__name__)
-DEFAULT_SYNTHESIZERS = [GaussianCopulaSynthesizer, CTGANSynthesizer, UniformSynthesizer]
+DEFAULT_SYNTHESIZERS = ['GaussianCopulaSynthesizer', 'CTGANSynthesizer', 'UniformSynthesizer']
 DEFAULT_DATASETS = [
     'adult',
     'alarm',
@@ -271,7 +271,11 @@ def _generate_job_args_list(
         if additional_datasets_folder is None
         else get_dataset_paths(
             modality='single_table',
-            bucket=additional_datasets_folder,
+            bucket=(
+                additional_datasets_folder
+                if is_s3_path(additional_datasets_folder)
+                else os.path.join(additional_datasets_folder, 'single_table')
+            ),
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key_key,
         )
@@ -861,6 +865,7 @@ def _directory_exists(bucket_name, s3_file_path):
 
 
 def _check_write_permissions(s3_client, bucket_name):
+    s3_client = s3_client or boto3.client('s3')
     try:
         s3_client.put_object(Bucket=bucket_name, Key='__test__', Body=b'')
         write_permission = True
@@ -881,7 +886,7 @@ def _create_sdgym_script(params, output_filepath):
     bucket_name, key_prefix = parse_s3_path(output_filepath)
     if not _directory_exists(bucket_name, key_prefix):
         raise ValueError(f'Directories in {key_prefix} do not exist')
-    if not _check_write_permissions(bucket_name):
+    if not _check_write_permissions(None, bucket_name):
         raise ValueError('No write permissions allowed for the bucket.')
 
     # Add quotes to parameter strings
@@ -893,23 +898,22 @@ def _create_sdgym_script(params, output_filepath):
         params['output_filepath'] = "'" + params['output_filepath'] + "'"
 
     # Generate the output script to run on the e2 instance
-    synthesizer_string = 'synthesizers=['
-    for synthesizer in params['synthesizers']:
+    synthesizers = params.get('synthesizers', [])
+    names = []
+    for synthesizer in synthesizers:
         if isinstance(synthesizer, str):
-            synthesizer_string += synthesizer + ', '
+            names.append(synthesizer)
+        elif hasattr(synthesizer, '__name__'):
+            names.append(synthesizer.__name__)
         else:
-            synthesizer_string += synthesizer.__name__ + ', '
-    if params['synthesizers']:
-        synthesizer_string = synthesizer_string[:-2]
-    synthesizer_string += ']'
+            names.append(synthesizer.__class__.__name__)
+
+    all_names = '", "'.join(names)
+    synthesizer_string = f'synthesizers=["{all_names}"]'
     # The indentation of the string is important for the python script
     script_content = f"""import boto3
 from io import StringIO
 import sdgym
-from sdgym.synthesizers.sdv import (CopulaGANSynthesizer, CTGANSynthesizer,
-    GaussianCopulaSynthesizer, HMASynthesizer, PARSynthesizer, SDVRelationalSynthesizer,
-    SDVTabularSynthesizer, TVAESynthesizer)
-from sdgym.synthesizers import RealTabFormerSynthesizer
 
 results = sdgym.benchmark_single_table(
     {synthesizer_string}, custom_synthesizers={params['custom_synthesizers']},
@@ -1186,7 +1190,7 @@ def benchmark_single_table(
         custom_synthesizers (list[class] or ``None``):
             A list of custom synthesizer classes to use. These can be completely custom or
             they can be synthesizer variants (the output from ``create_single_table_synthesizer``
-            or ``create_sdv_synthesizer_variant``). Defaults to ``None``.
+            or ``create_synthesizer_variant``). Defaults to ``None``.
         sdv_datasets (list[str] or ``None``):
             Names of the SDV demo datasets to use for the benchmark. Defaults to
             ``[adult, alarm, census, child, expedia_hotel_logs, insurance, intrusion, news,
 
@@ -97,7 +97,7 @@ def _download_datasets(args):
     _env_setup(args.logfile, args.verbose)
     datasets = args.datasets
     if not datasets:
-        datasets = sdgym.datasets.get_available_datasets(
+        datasets = sdgym.datasets._get_available_datasets(
             args.bucket, args.aws_access_key_id, args.aws_secret_access_key
         )['name']
 
@@ -118,7 +118,7 @@ def _list_downloaded(args):
 
 
 def _list_available(args):
-    datasets = sdgym.datasets.get_available_datasets(
+    datasets = sdgym.datasets._get_available_datasets(
         args.bucket, args.aws_access_key_id, args.aws_secret_access_key
     )
     _print_table(datasets, args.sort, args.reverse, {'size': humanfriendly.format_size})
 
@@ -275,3 +275,36 @@ def summarize_datasets(self, modality, output_filepath=None):
             dataset_summary.to_csv(output_filepath, index=False)
 
         return dataset_summary
+
+    def list_datasets(self, modality, output_filepath=None):
+        """List available datasets for a modality using metainfo only.
+
+        This is a lightweight alternative to ``summarize_datasets`` that does not load
+        the actual data. It reads dataset information from the ``metainfo.yaml`` files
+        in the bucket and returns a table equivalent to the legacy
+        ``get_available_datasets`` output.
+
+        Args:
+            modality (str):
+                It must be ``'single_table'``, ``'multi_table'`` or ``'sequential'``.
+            output_filepath (str, optional):
+                Full path to a ``.csv`` file where the resulting table will be written.
+                If not provided, the table is only returned.
+
+        Returns:
+            pd.DataFrame:
+                A DataFrame with columns: ``['dataset_name', 'size_MB', 'num_tables']``.
+        """
+        self._validate_output_filepath(output_filepath)
+        _validate_modality(modality)
+
+        dataframe = _get_available_datasets(
+            modality=modality,
+            bucket=self._bucket_name,
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+        )
+        if output_filepath:
+            dataframe.to_csv(output_filepath, index=False)
+
+        return dataframe
@@ -254,21 +254,6 @@ def load_dataset(
     return data, metadata_dict
 
 
-def get_available_datasets(modality='single_table'):
-    """Get available single_table datasets.
-
-    Args:
-        modality (str):
-            It must be ``'single_table'``, ``'multi_table'`` or ``'sequential'``.
-
-    Return:
-        pd.DataFrame:
-            Table of available datasets and their sizes.
-    """
-    _validate_modality(modality)
-    return _get_available_datasets(modality)
-
-
 def get_dataset_paths(
     modality,
     datasets=None,