Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ dependencies = [
"pandas>=2.2.3;python_version>='3.13'",
'tqdm>=4.29',
'copulas>=0.12.1',
'ctgan>=0.11.0',
'ctgan>=0.11.1',
'deepecho>=0.7.0',
'rdt>=1.18.2',
'sdmetrics>=0.21.0',
Expand Down
12 changes: 10 additions & 2 deletions sdv/single_table/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,15 +309,23 @@ def update_transformers(self, column_name_to_transformer):
msg = 'For this change to take effect, please refit the synthesizer using `fit`.'
warnings.warn(msg, RefitWarning)

def _resolve_gpu_parameters(self, parameters):
if parameters.get('cuda') is not None and parameters.get('enable_gpu') is None:
parameters.pop('enable_gpu', None) # Ensure backward-compatibility
elif 'cuda' in parameters: # Removed because deprecated
del parameters['cuda']

return parameters

def get_parameters(self):
"""Return the parameters used to instantiate the synthesizer."""
parameters = inspect.signature(self.__init__).parameters
instantiated_parameters = {}
for parameter_name in parameters:
if parameter_name != 'metadata':
if parameter_name not in ['metadata']:
instantiated_parameters[parameter_name] = self.__dict__.get(parameter_name)

return instantiated_parameters
return self._resolve_gpu_parameters(instantiated_parameters)

def get_metadata(self, version='original'):
"""Get the metadata, either original or modified after applying constraints.
Expand Down
8 changes: 7 additions & 1 deletion sdv/single_table/copulagan.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ class CopulaGANSynthesizer(CTGANSynthesizer):
Whether to print fit progress on stdout. Defaults to ``False``.
epochs (int):
Number of training epochs. Defaults to 300.
enable_gpu (bool):
Whether to attempt to use GPU for computation.
Defaults to ``True``.
cuda (bool or str):
**Deprecated**
If ``True``, use CUDA. If an ``str``, use the indicated device.
If ``False``, do not use cuda at all.
numerical_distributions (dict):
Expand Down Expand Up @@ -139,9 +143,10 @@ def __init__(
verbose=False,
epochs=300,
pac=10,
cuda=True,
enable_gpu=True,
numerical_distributions=None,
default_distribution=None,
cuda=None,
):
super().__init__(
metadata,
Expand All @@ -161,6 +166,7 @@ def __init__(
verbose=verbose,
epochs=epochs,
pac=pac,
enable_gpu=enable_gpu,
cuda=cuda,
)

Expand Down
25 changes: 17 additions & 8 deletions sdv/single_table/ctgan.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

try:
from ctgan import CTGAN, TVAE
from ctgan.synthesizers._utils import get_enable_gpu_value

import_error = None
except ModuleNotFoundError as e:
Expand Down Expand Up @@ -154,7 +155,11 @@ class CTGANSynthesizer(LossValuesMixin, MissingModuleMixin, BaseSingleTableSynth
pac (int):
Number of samples to group together when applying the discriminator.
Defaults to 10.
enable_gpu (bool):
Whether to attempt to use GPU for computation.
Defaults to ``True``.
cuda (bool or str):
**Deprecated**
If ``True``, use CUDA. If a ``str``, use the indicated device.
If ``False``, do not use cuda at all.
"""
Expand All @@ -180,7 +185,8 @@ def __init__(
verbose=False,
epochs=300,
pac=10,
cuda=True,
enable_gpu=True,
cuda=None,
):
if CTGAN is None:
self.raise_module_not_found_error(import_error)
Expand All @@ -204,8 +210,7 @@ def __init__(
self.verbose = verbose
self.epochs = epochs
self.pac = pac
self.cuda = cuda

self.enable_gpu = get_enable_gpu_value(enable_gpu, cuda)
self._model_kwargs = {
'embedding_dim': embedding_dim,
'generator_dim': generator_dim,
Expand All @@ -220,7 +225,7 @@ def __init__(
'verbose': verbose,
'epochs': epochs,
'pac': pac,
'cuda': cuda,
'enable_gpu': self.enable_gpu,
}

def _estimate_num_columns(self, data):
Expand Down Expand Up @@ -353,7 +358,11 @@ class TVAESynthesizer(LossValuesMixin, MissingModuleMixin, BaseSingleTableSynthe
Number of training epochs. Defaults to 300.
loss_factor (int):
Multiplier for the reconstruction error. Defaults to 2.
enable_gpu (bool):
Whether to attempt to use GPU for computation.
Defaults to ``True``.
cuda (bool or str):
**Deprecated**
If ``True``, use CUDA. If a ``str``, use the indicated device.
If ``False``, do not use cuda at all.
"""
Expand All @@ -373,7 +382,8 @@ def __init__(
verbose=False,
epochs=300,
loss_factor=2,
cuda=True,
enable_gpu=True,
cuda=None,
):
if TVAE is None:
self.raise_module_not_found_error(import_error)
Expand All @@ -390,8 +400,7 @@ def __init__(
self.verbose = verbose
self.epochs = epochs
self.loss_factor = loss_factor
self.cuda = cuda

self.enable_gpu = get_enable_gpu_value(enable_gpu, cuda)
self._model_kwargs = {
'embedding_dim': embedding_dim,
'compress_dims': compress_dims,
Expand All @@ -401,7 +410,7 @@ def __init__(
'verbose': verbose,
'epochs': epochs,
'loss_factor': loss_factor,
'cuda': cuda,
'enable_gpu': self.enable_gpu,
}

def _fit(self, processed_data):
Expand Down
10 changes: 6 additions & 4 deletions tests/integration/single_table/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@
})

SYNTHESIZERS = [
pytest.param(CTGANSynthesizer(METADATA, epochs=1, cuda=False), id='CTGANSynthesizer'),
pytest.param(TVAESynthesizer(METADATA, epochs=1, cuda=False), id='TVAESynthesizer'),
pytest.param(CTGANSynthesizer(METADATA, epochs=1, enable_gpu=False), id='CTGANSynthesizer'),
pytest.param(TVAESynthesizer(METADATA, epochs=1, enable_gpu=False), id='TVAESynthesizer'),
pytest.param(GaussianCopulaSynthesizer(METADATA), id='GaussianCopulaSynthesizer'),
pytest.param(CopulaGANSynthesizer(METADATA, epochs=1, cuda=False), id='CopulaGANSynthesizer'),
pytest.param(
CopulaGANSynthesizer(METADATA, epochs=1, enable_gpu=False), id='CopulaGANSynthesizer'
),
]


Expand Down Expand Up @@ -270,7 +272,7 @@ def test_sampling_reset_sampling(synthesizer):
})

if isinstance(synthesizer, (CTGANSynthesizer, TVAESynthesizer)):
synthesizer = synthesizer.__class__(metadata, cuda=False)
synthesizer = synthesizer.__class__(metadata, enable_gpu=False)
else:
synthesizer = synthesizer.__class__(metadata)

Expand Down
62 changes: 61 additions & 1 deletion tests/integration/single_table/test_ctgan.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import platform
import re
from unittest.mock import patch

import numpy as np
import pandas as pd
import pytest
import torch
from rdt.transformers import FloatFormatter, LabelEncoder

from sdv.cag import FixedCombinations
from sdv.datasets.demo import download_demo
from sdv.errors import InvalidDataTypeError
from sdv.evaluation.single_table import evaluate_quality, get_column_pair_plot, get_column_plot
from sdv.metadata.metadata import Metadata
from sdv.single_table import CTGANSynthesizer, TVAESynthesizer
from sdv.single_table import CopulaGANSynthesizer, CTGANSynthesizer, TVAESynthesizer


def test__estimate_num_columns():
Expand Down Expand Up @@ -331,3 +333,61 @@ def test_tvae___init___without_torch(mock_import_error):
# Run and Assert
with pytest.raises(ModuleNotFoundError, match=msg):
TVAESynthesizer(metadata)


@pytest.mark.parametrize(
'synthesizer_class', [CTGANSynthesizer, TVAESynthesizer, CopulaGANSynthesizer]
)
def test_enable_gpu_parameter(synthesizer_class):
"""Test that the `enable_gpu` parameter is correctly passed to the underlying model."""
# Setup
data, metadata = download_demo(modality='single_table', dataset_name='fake_hotel_guests')
expected_warning = re.escape(
'`cuda` parameter is deprecated and will be removed in a future release. '
'Please use `enable_gpu` instead.'
)
expected_error = re.escape(
'Cannot resolve the provided values of `cuda` and `enable_gpu` parameters. '
'Please use only `enable_gpu`.'
)

# Run
synthesizer_1 = synthesizer_class(metadata)
synthesizer_2 = synthesizer_class(metadata, enable_gpu=False)
with pytest.warns(FutureWarning, match=expected_warning):
synthesizer_3 = synthesizer_class(metadata, cuda=True)

with pytest.raises(ValueError, match=expected_error):
synthesizer_class(metadata, enable_gpu=False, cuda=True)

synthesizer_1.fit(data)
synthesizer_2.fit(data)
synthesizer_3.fit(data)
synthetic_data_1 = synthesizer_1.sample(10)
synthetic_data_2 = synthesizer_2.sample(10)
synthetic_data_3 = synthesizer_3.sample(10)

# Assert
data_columns = data.columns.tolist()
if (
platform.machine() == 'arm64'
and getattr(torch.backends, 'mps', None)
and torch.backends.mps.is_available()
):
expected_device = torch.device('mps')
elif torch.cuda.is_available():
expected_device = torch.device('cuda')
else:
expected_device = torch.device('cpu')

assert synthesizer_1._model._enable_gpu is True
assert synthesizer_1._model._device == expected_device
assert synthesizer_2._model._enable_gpu is False
assert synthesizer_2._model._device == torch.device('cpu')
assert synthesizer_3._model._enable_gpu is True
assert synthesizer_3._model._device == expected_device
assert synthetic_data_1.columns.tolist() == data_columns
assert synthetic_data_2.columns.tolist() == data_columns
assert synthetic_data_3.columns.tolist() == data_columns
assert len(synthetic_data_1) == 10
assert len(synthetic_data_2) == len(synthetic_data_3) == 10
46 changes: 46 additions & 0 deletions tests/unit/single_table/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,52 @@ def test_set_address_columns_warning(self):
['country_column', 'city_column'], anonymization_level='full'
)

def test__resolve_gpu_parameters(self):
"""Test the `_resolve_gpu_parameters` method."""
# Setup
metadata = Metadata()
instance = BaseSingleTableSynthesizer(metadata)
parameters_with_cuda = {'cuda': True, 'enable_gpu': True}
parameters_with_cuda_only = {'cuda': True}
parameters_with_cuda_none = {'cuda': None, 'enable_gpu': True}
parameters_without_cuda = {'enable_gpu': False}

# Run
result_with_cuda = instance._resolve_gpu_parameters(parameters_with_cuda)
result_with_cuda_only = instance._resolve_gpu_parameters(parameters_with_cuda_only)
result_with_cuda_none = instance._resolve_gpu_parameters(parameters_with_cuda_none)
result_without_cuda = instance._resolve_gpu_parameters(parameters_without_cuda)

# Assert
assert result_with_cuda == {'enable_gpu': True}
assert result_with_cuda_only == {'cuda': True}
assert result_with_cuda_none == {'enable_gpu': True}
assert result_without_cuda == {'enable_gpu': False}

def test_get_parameters_mock(self):
"""Test that `get_parameters` calls `_resolve_gpu_parameters`"""
metadata = Metadata()
instance = BaseSynthesizer(
metadata, enforce_min_max_values=False, enforce_rounding=False, locales='en_CA'
)
expected_parameters = {
'enforce_min_max_values': False,
'enforce_rounding': True,
'locales': 'en_CA',
}
instance._resolve_gpu_parameters = Mock(return_value=expected_parameters)

# Run
parameters = instance.get_parameters()

# Assert
assert parameters == expected_parameters
instance._resolve_gpu_parameters.assert_called_once_with({
'enforce_min_max_values': False,
'enforce_rounding': False,
'locales': 'en_CA',
})

def test_get_parameters(self):
"""Test that it returns every ``init`` parameter without the ``metadata``."""
# Setup
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/single_table/test_copulagan.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test___init__(self):
assert instance.verbose is False
assert instance.epochs == 300
assert instance.pac == 10
assert instance.cuda is True
assert instance.enable_gpu is True
assert instance.numerical_distributions == {}
assert instance.default_distribution == 'beta'
assert instance._numerical_distributions == {}
Expand Down Expand Up @@ -79,7 +79,7 @@ def test___init__with_unified_metadata(self):
assert instance.verbose is False
assert instance.epochs == 300
assert instance.pac == 10
assert instance.cuda is True
assert instance.enable_gpu is True
assert instance.numerical_distributions == {}
assert instance.default_distribution == 'beta'
assert instance._numerical_distributions == {}
Expand Down Expand Up @@ -128,7 +128,7 @@ def test___init__custom(self):
verbose=verbose,
epochs=epochs,
pac=pac,
cuda=cuda,
enable_gpu=cuda,
numerical_distributions=numerical_distributions,
default_distribution=default_distribution,
)
Expand All @@ -149,7 +149,7 @@ def test___init__custom(self):
assert instance.verbose is True
assert instance.epochs == epochs
assert instance.pac == pac
assert instance.cuda is False
assert instance.enable_gpu is False
assert instance.numerical_distributions == {'field': 'gamma'}
assert instance._numerical_distributions == {'field': GammaUnivariate}
assert instance.default_distribution == 'uniform'
Expand Down Expand Up @@ -208,7 +208,7 @@ def test_get_params(self):
'verbose': False,
'epochs': 300,
'pac': 10,
'cuda': True,
'enable_gpu': True,
'numerical_distributions': {},
'default_distribution': 'beta',
}
Expand Down
Loading