|
| 1 | +"""Tests for the realtabformer module.""" |
| 2 | + |
| 3 | +from unittest.mock import MagicMock, patch |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import pandas as pd |
| 7 | +import pytest |
| 8 | + |
| 9 | +from sdgym.synthesizers import RealTabFormerSynthesizer |
| 10 | + |
| 11 | + |
| 12 | +@pytest.fixture |
| 13 | +def sample_data(): |
| 14 | + """Provide sample data for testing.""" |
| 15 | + n_samples = 10 |
| 16 | + num_values = np.random.normal(size=n_samples) |
| 17 | + |
| 18 | + return pd.DataFrame({ |
| 19 | + 'num': num_values, |
| 20 | + }) |
| 21 | + |
| 22 | + |
| 23 | +class TestRealTabFormerSynthesizer: |
| 24 | + """Unit tests for RealTabFormerSynthesizer integration with SDGym.""" |
| 25 | + |
| 26 | + @patch('realtabformer.REaLTabFormer') |
| 27 | + def test__get_trained_synthesizer(self, mock_real_tab_former): |
| 28 | + """Test _get_trained_synthesizer |
| 29 | +
|
| 30 | + Initializes REaLTabFormer and fits REaLTabFormer with |
| 31 | + correct parameters. |
| 32 | + """ |
| 33 | + # Setup |
| 34 | + mock_model = MagicMock() |
| 35 | + mock_real_tab_former.return_value = mock_model |
| 36 | + data = MagicMock() |
| 37 | + metadata = MagicMock() |
| 38 | + synthesizer = RealTabFormerSynthesizer() |
| 39 | + |
| 40 | + # Run |
| 41 | + result = synthesizer._get_trained_synthesizer(data, metadata) |
| 42 | + |
| 43 | + # Assert |
| 44 | + mock_real_tab_former.assert_called_once_with(model_type='tabular') |
| 45 | + mock_model.fit.assert_called_once_with(data, device='cpu') |
| 46 | + assert result == mock_model, 'Expected the trained model to be returned.' |
| 47 | + |
| 48 | + def test__sample_from_synthesizer(self): |
| 49 | + """Test _sample_from_synthesizer generates data with the specified sample size.""" |
| 50 | + # Setup |
| 51 | + trained_model = MagicMock() |
| 52 | + trained_model.sample.return_value = MagicMock(shape=(10, 5)) # Mock sample data shape |
| 53 | + n_sample = 10 |
| 54 | + synthesizer = RealTabFormerSynthesizer() |
| 55 | + |
| 56 | + # Run |
| 57 | + synthetic_data = synthesizer._sample_from_synthesizer(trained_model, n_sample) |
| 58 | + |
| 59 | + # Assert |
| 60 | + trained_model.sample.assert_called_once_with(n_sample, device='cpu') |
| 61 | + assert synthetic_data.shape[0] == n_sample, ( |
| 62 | + f'Expected {n_sample} rows, but got {synthetic_data.shape[0]}' |
| 63 | + ) |
0 commit comments