Skip to content

Commit 0e701b9

Browse files
committed
make release-tag: Merge branch 'main' into stable
2 parents 73d120f + b62aaba commit 0e701b9

File tree

12 files changed

+156
-19
lines changed

12 files changed

+156
-19
lines changed

HISTORY.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Release Notes
22

3+
## v1.29.1 - 2025-11-21
4+
5+
### Bugs Fixed
6+
7+
* Warning raised multiple times when metadata does not have the 'datetime_format' key - Issue [#2739](https://github.com/sdv-dev/SDV/issues/2739) by @fealho
8+
* Adding constraints to multi-table synthesizer in multiple steps causes fit to crash - Issue [#2736](https://github.com/sdv-dev/SDV/issues/2736) by @frances-h
9+
310
## v1.29.0 - 2025-11-14
411

512
### New Features

latest_requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ copulas==0.12.3
33
ctgan==0.11.1
44
deepecho==0.7.0
55
graphviz==0.21
6-
numpy==2.3.4
6+
numpy==2.3.5
77
pandas==2.3.3
88
platformdirs==4.5.0
99
rdt==1.18.2

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ namespaces = false
142142
version = {attr = 'sdv.__version__'}
143143

144144
[tool.bumpversion]
145-
current_version = "1.29.0"
145+
current_version = "1.29.1.dev1"
146146
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
147147
serialize = [
148148
'{major}.{minor}.{patch}.{release}{candidate}',

sdv/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
__author__ = 'DataCebo, Inc.'
88
__email__ = '[email protected]'
9-
__version__ = '1.29.0'
9+
__version__ = '1.29.1.dev1'
1010

1111

1212
import sys

sdv/metadata/single_table.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,7 @@ def validate_data(self, data, sdtype_warnings=None):
12961296
A warning is being raised if ``datetime_format`` is missing from a column represented
12971297
as ``object`` in the dataframe and its sdtype is ``datetime``.
12981298
"""
1299+
_datetime_format_warning_flag = sdtype_warnings is not None
12991300
sdtype_warnings = sdtype_warnings if sdtype_warnings is not None else defaultdict(list)
13001301
if not isinstance(data, pd.DataFrame):
13011302
raise ValueError(f'Data must be a DataFrame, not a {type(data)}.')
@@ -1315,7 +1316,7 @@ def validate_data(self, data, sdtype_warnings=None):
13151316
errors += self._validate_column_data(data[column], sdtype_warnings)
13161317

13171318
errors += self._validate_primary_key(data)
1318-
if sdtype_warnings is not None and len(sdtype_warnings):
1319+
if (not _datetime_format_warning_flag) and len(sdtype_warnings):
13191320
df = pd.DataFrame(sdtype_warnings)
13201321
message = (
13211322
"No 'datetime_format' is present in the metadata for the following columns:\n"

sdv/multi_table/base.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,17 @@ def _set_temp_numpy_seed(self):
8282

8383
def _initialize_models(self):
8484
with disable_single_table_logger():
85-
for table_name, table_metadata in self.metadata.tables.items():
85+
for table_name, table_metadata in self._modified_multi_table_metadata.tables.items():
8686
synthesizer_parameters = {'locales': self.locales}
8787
synthesizer_parameters.update(self._table_parameters.get(table_name, {}))
8888
metadata_dict = {'tables': {table_name: table_metadata.to_dict()}}
8989
metadata = Metadata.load_from_dict(metadata_dict)
9090
self._table_synthesizers[table_name] = self._synthesizer(
9191
metadata=metadata, **synthesizer_parameters
9292
)
93+
# Mark synthesizer as embedded in a multi-table setting
94+
# so it can suppres datetime_format warnings that are aggregated here
95+
self._table_synthesizers[table_name]._suppress_datetime_format_warning = True
9396
self._table_synthesizers[table_name]._data_processor.table_name = table_name
9497

9598
def _get_pbar_args(self, **kwargs):
@@ -129,7 +132,7 @@ def __init__(self, metadata, locales=['en_US'], synthesizer_kwargs=None):
129132
self._original_metadata = deepcopy(self.metadata)
130133
self._modified_multi_table_metadata = deepcopy(self.metadata)
131134
self.constraints = []
132-
self._has_seen_single_table_constraint = False
135+
self._single_table_constraints = []
133136
if synthesizer_kwargs is not None:
134137
warn_message = (
135138
'The `synthesizer_kwargs` parameter is deprecated as of SDV 1.2.0 and does not '
@@ -177,9 +180,10 @@ def _detect_single_table_constraints(self, constraints):
177180
constraints (list):
178181
A list of constraints to filter.
179182
"""
180-
idx_single_table_constraint = 0 if self._has_seen_single_table_constraint else None
183+
has_seen_single_table_constraint = len(self._single_table_constraints) > 0
184+
idx_single_table_constraint = 0 if has_seen_single_table_constraint else None
181185
for idx, constraint in enumerate(constraints):
182-
if self._has_seen_single_table_constraint and constraint._is_single_table is False:
186+
if has_seen_single_table_constraint and constraint._is_single_table is False:
183187
raise SynthesizerInputError(
184188
'Cannot apply multi-table constraint after single-table constraint has '
185189
'been applied.'
@@ -188,8 +192,8 @@ def _detect_single_table_constraints(self, constraints):
188192
if not constraint._is_single_table:
189193
continue
190194

191-
if not self._has_seen_single_table_constraint:
192-
self._has_seen_single_table_constraint = True
195+
if not has_seen_single_table_constraint:
196+
has_seen_single_table_constraint = True
193197
idx_single_table_constraint = idx
194198

195199
return idx_single_table_constraint
@@ -231,20 +235,21 @@ def add_constraints(self, constraints):
231235
self.constraints += multi_table_constraints
232236
self._constraints_fitted = False
233237
self._initialize_models()
234-
if single_table_constraints:
235-
for constraint in single_table_constraints:
238+
if self._single_table_constraints or single_table_constraints:
239+
for constraint in [*self._single_table_constraints, *single_table_constraints]:
236240
table_name = constraint.table_name
237241
self._table_synthesizers[table_name].add_constraints([constraint])
238242
try:
239243
self.metadata = constraint.get_updated_metadata(self.metadata)
240244
except ConstraintNotMetError:
241245
constraint.get_updated_metadata(self._modified_multi_table_metadata)
242246

247+
self._single_table_constraints += single_table_constraints
248+
243249
def get_constraints(self):
244250
"""Get a copy of the list of constraints applied to the synthesizer."""
245251
if not hasattr(self, 'constraints'):
246252
return []
247-
248253
constraints = []
249254
for constraint in self.constraints:
250255
if isinstance(constraint, ProgrammableConstraintHarness):
@@ -404,6 +409,8 @@ def set_table_parameters(self, table_name, table_parameters):
404409
self._table_synthesizers[table_name] = self._synthesizer(
405410
metadata=table_metadata, **table_parameters
406411
)
412+
# Mark synthesizer as embedded in a multi-table setting to avoid duplicate datetime warnings
413+
self._table_synthesizers[table_name]._suppress_datetime_format_warning = True
407414
self._table_synthesizers[table_name]._data_processor.table_name = table_name
408415
self._table_parameters[table_name].update(deepcopy(table_parameters))
409416

sdv/single_table/base.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,18 @@ def validate(self, data):
561561
data (pandas.DataFrame):
562562
The data to validate.
563563
"""
564-
self._original_metadata.validate_data({self._table_name: data})
564+
# Suppress duplicate datetime_format warning only when this single-table synthesizer
565+
# is embedded inside a multi-table synthesizer
566+
if getattr(self, '_suppress_datetime_format_warning', False):
567+
with warnings.catch_warnings():
568+
warnings.filterwarnings(
569+
'ignore',
570+
message=r"No 'datetime_format' is present.*",
571+
category=UserWarning,
572+
)
573+
self._original_metadata.validate_data({self._table_name: data})
574+
else:
575+
self._original_metadata.validate_data({self._table_name: data})
565576
self._validate_transform_constraints(data, enforce_constraint_fitting=True)
566577

567578
# Retaining the logic of returning errors and raising them here to maintain consistency

sdv/utils/utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import datetime
44
import sys
5+
import warnings
56
from copy import deepcopy
67

78
import cloudpickle
@@ -55,7 +56,14 @@ def drop_unknown_references(data, metadata, drop_missing_values=False, verbose=T
5556
})
5657
metadata.validate()
5758
try:
58-
metadata.validate_data(data)
59+
# Suppress duplicate datetime_format warnings during referential integrity validation.
60+
with warnings.catch_warnings():
61+
warnings.filterwarnings(
62+
'ignore',
63+
message=r"No 'datetime_format' is present.*",
64+
category=UserWarning,
65+
)
66+
metadata.validate_data(data)
5967
if drop_missing_values:
6068
_validate_foreign_keys_not_null(metadata, data)
6169

static_code_analysis.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Run started:2025-11-14 21:11:19.372743
1+
Run started:2025-11-21 21:03:20.369781
22

33
Test results:
44
>> Issue: [B110:try_except_pass] Try, Except, Pass detected.
@@ -121,7 +121,7 @@ Test results:
121121
--------------------------------------------------
122122

123123
Code scanned:
124-
Total lines of code: 17119
124+
Total lines of code: 17140
125125
Total lines skipped (#nosec): 0
126126
Total potential issues skipped due to specifically being disabled (e.g., #nosec BXXX): 0
127127

tests/integration/multi_table/test_constraints.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,47 @@ def test_overlapping_single_table_constraints():
4747
# Assert
4848
assert all(sampled['parent_table']['colA'] < sampled['parent_table']['colB'])
4949
assert all(sampled['parent_table']['colB'] < sampled['parent_table']['colC'])
50+
51+
52+
def test_add_constraint_iteratively():
53+
"""Test adding constraints in multiple steps."""
54+
# Setup
55+
parent_table = pd.DataFrame({
56+
'id': [i for i in range(20)],
57+
'colA': np.random.randint(low=0, high=100, size=20),
58+
})
59+
parent_table['colB'] = parent_table['colA'] + np.random.randint(low=1, high=10, size=20)
60+
parent_table['colC'] = parent_table['colB'] + np.random.randint(low=1, high=10, size=20)
61+
62+
child_table = pd.DataFrame({
63+
'parent_id': np.random.randint(low=0, high=20, size=100),
64+
'colD': np.random.randint(low=100, high=200, size=100),
65+
})
66+
data = {'parent_table': parent_table, 'child_table': child_table}
67+
68+
metadata = Metadata()
69+
metadata = Metadata.detect_from_dataframes(data)
70+
71+
constraint1 = Inequality(
72+
low_column_name='colA',
73+
high_column_name='colB',
74+
table_name='parent_table',
75+
strict_boundaries=True,
76+
)
77+
constraint2 = Inequality(
78+
low_column_name='colB',
79+
high_column_name='colC',
80+
table_name='parent_table',
81+
strict_boundaries=True,
82+
)
83+
synthesizer = HMASynthesizer(metadata)
84+
85+
# Run
86+
synthesizer.add_constraints([constraint1])
87+
synthesizer.add_constraints([constraint2])
88+
synthesizer.fit(data)
89+
sampled = synthesizer.sample(10)
90+
91+
# Assert
92+
assert all(sampled['parent_table']['colA'] < sampled['parent_table']['colB'])
93+
assert all(sampled['parent_table']['colB'] < sampled['parent_table']['colC'])

0 commit comments

Comments
 (0)