diff --git a/pyproject.toml b/pyproject.toml index 2f46ca925..08d59f722 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "pandas>=2.2.3;python_version>='3.13'", 'tqdm>=4.29', 'copulas>=0.12.1', - 'ctgan>=0.11.0', + 'ctgan>=0.11.1', 'deepecho>=0.7.0', 'rdt>=1.18.2', 'sdmetrics>=0.21.0', diff --git a/sdv/single_table/base.py b/sdv/single_table/base.py index eac21e425..1ab506568 100644 --- a/sdv/single_table/base.py +++ b/sdv/single_table/base.py @@ -309,15 +309,23 @@ def update_transformers(self, column_name_to_transformer): msg = 'For this change to take effect, please refit the synthesizer using `fit`.' warnings.warn(msg, RefitWarning) + def _resolve_gpu_parameters(self, parameters): + if parameters.get('cuda') is not None and parameters.get('enable_gpu') is None: + parameters.pop('enable_gpu', None) # Ensure backward-compatibility + elif 'cuda' in parameters: # Removed because deprecated + del parameters['cuda'] + + return parameters + def get_parameters(self): """Return the parameters used to instantiate the synthesizer.""" parameters = inspect.signature(self.__init__).parameters instantiated_parameters = {} for parameter_name in parameters: - if parameter_name != 'metadata': + if parameter_name not in ['metadata']: instantiated_parameters[parameter_name] = self.__dict__.get(parameter_name) - return instantiated_parameters + return self._resolve_gpu_parameters(instantiated_parameters) def get_metadata(self, version='original'): """Get the metadata, either original or modified after applying constraints. diff --git a/sdv/single_table/copulagan.py b/sdv/single_table/copulagan.py index c219f458d..ca91cff94 100644 --- a/sdv/single_table/copulagan.py +++ b/sdv/single_table/copulagan.py @@ -89,7 +89,11 @@ class CopulaGANSynthesizer(CTGANSynthesizer): Whether to print fit progress on stdout. Defaults to ``False``. epochs (int): Number of training epochs. Defaults to 300. + enable_gpu (bool): + Whether to attempt to use GPU for computation. + Defaults to ``True``. cuda (bool or str): + **Deprecated** If ``True``, use CUDA. If an ``str``, use the indicated device. If ``False``, do not use cuda at all. numerical_distributions (dict): @@ -139,9 +143,10 @@ def __init__( verbose=False, epochs=300, pac=10, - cuda=True, + enable_gpu=True, numerical_distributions=None, default_distribution=None, + cuda=None, ): super().__init__( metadata, @@ -161,6 +166,7 @@ def __init__( verbose=verbose, epochs=epochs, pac=pac, + enable_gpu=enable_gpu, cuda=cuda, ) diff --git a/sdv/single_table/ctgan.py b/sdv/single_table/ctgan.py index 5640357aa..ad2b18211 100644 --- a/sdv/single_table/ctgan.py +++ b/sdv/single_table/ctgan.py @@ -14,6 +14,7 @@ try: from ctgan import CTGAN, TVAE + from ctgan.synthesizers._utils import get_enable_gpu_value import_error = None except ModuleNotFoundError as e: @@ -154,7 +155,11 @@ class CTGANSynthesizer(LossValuesMixin, MissingModuleMixin, BaseSingleTableSynth pac (int): Number of samples to group together when applying the discriminator. Defaults to 10. + enable_gpu (bool): + Whether to attempt to use GPU for computation. + Defaults to ``True``. cuda (bool or str): + **Deprecated** If ``True``, use CUDA. If a ``str``, use the indicated device. If ``False``, do not use cuda at all. """ @@ -180,7 +185,8 @@ def __init__( verbose=False, epochs=300, pac=10, - cuda=True, + enable_gpu=True, + cuda=None, ): if CTGAN is None: self.raise_module_not_found_error(import_error) @@ -204,8 +210,7 @@ def __init__( self.verbose = verbose self.epochs = epochs self.pac = pac - self.cuda = cuda - + self.enable_gpu = get_enable_gpu_value(enable_gpu, cuda) self._model_kwargs = { 'embedding_dim': embedding_dim, 'generator_dim': generator_dim, @@ -220,7 +225,7 @@ def __init__( 'verbose': verbose, 'epochs': epochs, 'pac': pac, - 'cuda': cuda, + 'enable_gpu': self.enable_gpu, } def _estimate_num_columns(self, data): @@ -353,7 +358,11 @@ class TVAESynthesizer(LossValuesMixin, MissingModuleMixin, BaseSingleTableSynthe Number of training epochs. Defaults to 300. loss_factor (int): Multiplier for the reconstruction error. Defaults to 2. + enable_gpu (bool): + Whether to attempt to use GPU for computation. + Defaults to ``True``. cuda (bool or str): + **Deprecated** If ``True``, use CUDA. If a ``str``, use the indicated device. If ``False``, do not use cuda at all. """ @@ -373,7 +382,8 @@ def __init__( verbose=False, epochs=300, loss_factor=2, - cuda=True, + enable_gpu=True, + cuda=None, ): if TVAE is None: self.raise_module_not_found_error(import_error) @@ -390,8 +400,7 @@ def __init__( self.verbose = verbose self.epochs = epochs self.loss_factor = loss_factor - self.cuda = cuda - + self.enable_gpu = get_enable_gpu_value(enable_gpu, cuda) self._model_kwargs = { 'embedding_dim': embedding_dim, 'compress_dims': compress_dims, @@ -401,7 +410,7 @@ def __init__( 'verbose': verbose, 'epochs': epochs, 'loss_factor': loss_factor, - 'cuda': cuda, + 'enable_gpu': self.enable_gpu, } def _fit(self, processed_data): diff --git a/tests/integration/single_table/test_base.py b/tests/integration/single_table/test_base.py index 7047e4613..408704673 100644 --- a/tests/integration/single_table/test_base.py +++ b/tests/integration/single_table/test_base.py @@ -34,10 +34,12 @@ }) SYNTHESIZERS = [ - pytest.param(CTGANSynthesizer(METADATA, epochs=1, cuda=False), id='CTGANSynthesizer'), - pytest.param(TVAESynthesizer(METADATA, epochs=1, cuda=False), id='TVAESynthesizer'), + pytest.param(CTGANSynthesizer(METADATA, epochs=1, enable_gpu=False), id='CTGANSynthesizer'), + pytest.param(TVAESynthesizer(METADATA, epochs=1, enable_gpu=False), id='TVAESynthesizer'), pytest.param(GaussianCopulaSynthesizer(METADATA), id='GaussianCopulaSynthesizer'), - pytest.param(CopulaGANSynthesizer(METADATA, epochs=1, cuda=False), id='CopulaGANSynthesizer'), + pytest.param( + CopulaGANSynthesizer(METADATA, epochs=1, enable_gpu=False), id='CopulaGANSynthesizer' + ), ] @@ -270,7 +272,7 @@ def test_sampling_reset_sampling(synthesizer): }) if isinstance(synthesizer, (CTGANSynthesizer, TVAESynthesizer)): - synthesizer = synthesizer.__class__(metadata, cuda=False) + synthesizer = synthesizer.__class__(metadata, enable_gpu=False) else: synthesizer = synthesizer.__class__(metadata) diff --git a/tests/integration/single_table/test_ctgan.py b/tests/integration/single_table/test_ctgan.py index 2554e9a2c..c50493040 100644 --- a/tests/integration/single_table/test_ctgan.py +++ b/tests/integration/single_table/test_ctgan.py @@ -1,9 +1,11 @@ +import platform import re from unittest.mock import patch import numpy as np import pandas as pd import pytest +import torch from rdt.transformers import FloatFormatter, LabelEncoder from sdv.cag import FixedCombinations @@ -11,7 +13,7 @@ from sdv.errors import InvalidDataTypeError from sdv.evaluation.single_table import evaluate_quality, get_column_pair_plot, get_column_plot from sdv.metadata.metadata import Metadata -from sdv.single_table import CTGANSynthesizer, TVAESynthesizer +from sdv.single_table import CopulaGANSynthesizer, CTGANSynthesizer, TVAESynthesizer def test__estimate_num_columns(): @@ -331,3 +333,61 @@ def test_tvae___init___without_torch(mock_import_error): # Run and Assert with pytest.raises(ModuleNotFoundError, match=msg): TVAESynthesizer(metadata) + + +@pytest.mark.parametrize( + 'synthesizer_class', [CTGANSynthesizer, TVAESynthesizer, CopulaGANSynthesizer] +) +def test_enable_gpu_parameter(synthesizer_class): + """Test that the `enable_gpu` parameter is correctly passed to the underlying model.""" + # Setup + data, metadata = download_demo(modality='single_table', dataset_name='fake_hotel_guests') + expected_warning = re.escape( + '`cuda` parameter is deprecated and will be removed in a future release. ' + 'Please use `enable_gpu` instead.' + ) + expected_error = re.escape( + 'Cannot resolve the provided values of `cuda` and `enable_gpu` parameters. ' + 'Please use only `enable_gpu`.' + ) + + # Run + synthesizer_1 = synthesizer_class(metadata) + synthesizer_2 = synthesizer_class(metadata, enable_gpu=False) + with pytest.warns(FutureWarning, match=expected_warning): + synthesizer_3 = synthesizer_class(metadata, cuda=True) + + with pytest.raises(ValueError, match=expected_error): + synthesizer_class(metadata, enable_gpu=False, cuda=True) + + synthesizer_1.fit(data) + synthesizer_2.fit(data) + synthesizer_3.fit(data) + synthetic_data_1 = synthesizer_1.sample(10) + synthetic_data_2 = synthesizer_2.sample(10) + synthetic_data_3 = synthesizer_3.sample(10) + + # Assert + data_columns = data.columns.tolist() + if ( + platform.machine() == 'arm64' + and getattr(torch.backends, 'mps', None) + and torch.backends.mps.is_available() + ): + expected_device = torch.device('mps') + elif torch.cuda.is_available(): + expected_device = torch.device('cuda') + else: + expected_device = torch.device('cpu') + + assert synthesizer_1._model._enable_gpu is True + assert synthesizer_1._model._device == expected_device + assert synthesizer_2._model._enable_gpu is False + assert synthesizer_2._model._device == torch.device('cpu') + assert synthesizer_3._model._enable_gpu is True + assert synthesizer_3._model._device == expected_device + assert synthetic_data_1.columns.tolist() == data_columns + assert synthetic_data_2.columns.tolist() == data_columns + assert synthetic_data_3.columns.tolist() == data_columns + assert len(synthetic_data_1) == 10 + assert len(synthetic_data_2) == len(synthetic_data_3) == 10 diff --git a/tests/unit/single_table/test_base.py b/tests/unit/single_table/test_base.py index 213457dbb..0fc8474f6 100644 --- a/tests/unit/single_table/test_base.py +++ b/tests/unit/single_table/test_base.py @@ -369,6 +369,52 @@ def test_set_address_columns_warning(self): ['country_column', 'city_column'], anonymization_level='full' ) + def test__resolve_gpu_parameters(self): + """Test the `_resolve_gpu_parameters` method.""" + # Setup + metadata = Metadata() + instance = BaseSingleTableSynthesizer(metadata) + parameters_with_cuda = {'cuda': True, 'enable_gpu': True} + parameters_with_cuda_only = {'cuda': True} + parameters_with_cuda_none = {'cuda': None, 'enable_gpu': True} + parameters_without_cuda = {'enable_gpu': False} + + # Run + result_with_cuda = instance._resolve_gpu_parameters(parameters_with_cuda) + result_with_cuda_only = instance._resolve_gpu_parameters(parameters_with_cuda_only) + result_with_cuda_none = instance._resolve_gpu_parameters(parameters_with_cuda_none) + result_without_cuda = instance._resolve_gpu_parameters(parameters_without_cuda) + + # Assert + assert result_with_cuda == {'enable_gpu': True} + assert result_with_cuda_only == {'cuda': True} + assert result_with_cuda_none == {'enable_gpu': True} + assert result_without_cuda == {'enable_gpu': False} + + def test_get_parameters_mock(self): + """Test that `get_parameters` calls `_resolve_gpu_parameters`""" + metadata = Metadata() + instance = BaseSynthesizer( + metadata, enforce_min_max_values=False, enforce_rounding=False, locales='en_CA' + ) + expected_parameters = { + 'enforce_min_max_values': False, + 'enforce_rounding': True, + 'locales': 'en_CA', + } + instance._resolve_gpu_parameters = Mock(return_value=expected_parameters) + + # Run + parameters = instance.get_parameters() + + # Assert + assert parameters == expected_parameters + instance._resolve_gpu_parameters.assert_called_once_with({ + 'enforce_min_max_values': False, + 'enforce_rounding': False, + 'locales': 'en_CA', + }) + def test_get_parameters(self): """Test that it returns every ``init`` parameter without the ``metadata``.""" # Setup diff --git a/tests/unit/single_table/test_copulagan.py b/tests/unit/single_table/test_copulagan.py index 4bdaccef4..ea05c28ff 100644 --- a/tests/unit/single_table/test_copulagan.py +++ b/tests/unit/single_table/test_copulagan.py @@ -43,7 +43,7 @@ def test___init__(self): assert instance.verbose is False assert instance.epochs == 300 assert instance.pac == 10 - assert instance.cuda is True + assert instance.enable_gpu is True assert instance.numerical_distributions == {} assert instance.default_distribution == 'beta' assert instance._numerical_distributions == {} @@ -79,7 +79,7 @@ def test___init__with_unified_metadata(self): assert instance.verbose is False assert instance.epochs == 300 assert instance.pac == 10 - assert instance.cuda is True + assert instance.enable_gpu is True assert instance.numerical_distributions == {} assert instance.default_distribution == 'beta' assert instance._numerical_distributions == {} @@ -128,7 +128,7 @@ def test___init__custom(self): verbose=verbose, epochs=epochs, pac=pac, - cuda=cuda, + enable_gpu=cuda, numerical_distributions=numerical_distributions, default_distribution=default_distribution, ) @@ -149,7 +149,7 @@ def test___init__custom(self): assert instance.verbose is True assert instance.epochs == epochs assert instance.pac == pac - assert instance.cuda is False + assert instance.enable_gpu is False assert instance.numerical_distributions == {'field': 'gamma'} assert instance._numerical_distributions == {'field': GammaUnivariate} assert instance.default_distribution == 'uniform' @@ -208,7 +208,7 @@ def test_get_params(self): 'verbose': False, 'epochs': 300, 'pac': 10, - 'cuda': True, + 'enable_gpu': True, 'numerical_distributions': {}, 'default_distribution': 'beta', } diff --git a/tests/unit/single_table/test_ctgan.py b/tests/unit/single_table/test_ctgan.py index c68902ab6..337bb34cd 100644 --- a/tests/unit/single_table/test_ctgan.py +++ b/tests/unit/single_table/test_ctgan.py @@ -60,7 +60,7 @@ def test___init__(self): assert instance.verbose is False assert instance.epochs == 300 assert instance.pac == 10 - assert instance.cuda is True + assert instance.enable_gpu is True def test___init__with_unified_metadata(self): """Test creating an instance of ``CTGANSynthesizer`` with Metadata.""" @@ -92,7 +92,7 @@ def test___init__with_unified_metadata(self): assert instance.verbose is False assert instance.epochs == 300 assert instance.pac == 10 - assert instance.cuda is True + assert instance.enable_gpu is True def test___init__custom(self): """Test creating an instance of ``CTGANSynthesizer`` with custom parameters.""" @@ -113,7 +113,7 @@ def test___init__custom(self): verbose = True epochs = 150 pac = 5 - cuda = False + enable_gpu = False # Run instance = CTGANSynthesizer( @@ -133,7 +133,7 @@ def test___init__custom(self): verbose=verbose, epochs=epochs, pac=pac, - cuda=cuda, + enable_gpu=enable_gpu, ) # Assert @@ -152,7 +152,7 @@ def test___init__custom(self): assert instance.verbose is True assert instance.epochs == epochs assert instance.pac == pac - assert instance.cuda is False + assert instance.enable_gpu is False @patch('sdv.single_table.ctgan.CTGAN', None) @patch('sdv.single_table.ctgan.import_error') @@ -195,7 +195,7 @@ def test_get_parameters(self): 'verbose': False, 'epochs': 300, 'pac': 10, - 'cuda': True, + 'enable_gpu': True, } def test__estimate_num_columns(self): @@ -305,7 +305,7 @@ def test__fit(self, mock_category_validate, mock_detect_discrete_columns, mock_c assert args[2] == instance._data_processor._hyper_transformer.field_transformers mock_ctgan.assert_called_once_with( batch_size=500, - cuda=True, + enable_gpu=True, discriminator_decay=1e-6, discriminator_dim=(256, 256), discriminator_lr=2e-4, @@ -408,7 +408,7 @@ def test___init__(self): assert instance.verbose is False assert instance.epochs == 300 assert instance.loss_factor == 2 - assert instance.cuda is True + assert instance.enable_gpu is True def test___init__custom(self): """Test creating an instance of ``TVAESynthesizer`` with custom parameters.""" @@ -439,7 +439,7 @@ def test___init__custom(self): verbose=verbose, epochs=epochs, loss_factor=loss_factor, - cuda=cuda, + enable_gpu=cuda, ) # Assert @@ -453,7 +453,7 @@ def test___init__custom(self): assert instance.verbose is True assert instance.epochs == 150 assert instance.loss_factor == 4 - assert instance.cuda is False + assert instance.enable_gpu is False @patch('sdv.single_table.ctgan.TVAE', None) @patch('sdv.single_table.ctgan.import_error') @@ -490,7 +490,7 @@ def test_get_parameters(self): 'verbose': False, 'epochs': 300, 'loss_factor': 2, - 'cuda': True, + 'enable_gpu': True, } @patch('sdv.single_table.ctgan.TVAE') @@ -524,7 +524,7 @@ def test__fit(self, mock_category_validate, mock_detect_discrete_columns, mock_t mock_tvae.assert_called_once_with( batch_size=500, compress_dims=(128, 128), - cuda=True, + enable_gpu=True, decompress_dims=(128, 128), embedding_dim=128, verbose=False,