Skip to content

Commit 1f069cc

Browse files
authored
Merge branch 'main' into issue-2719-context-order-par-synthesizer
2 parents f88e933 + a124e1d commit 1f069cc

File tree

18 files changed

+819
-425
lines changed

18 files changed

+819
-425
lines changed

HISTORY.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
# Release Notes
22

3+
## v1.28.0 - 2025-10-17
4+
5+
### New Features
6+
7+
* Unable to validate just 1 table of a multi-table schema - Issue [#2678](https://github.com/sdv-dev/SDV/issues/2678) by @frances-h
8+
* Allow users to validate the DayZ parameters - Issue [#2667](https://github.com/sdv-dev/SDV/issues/2667) by @frances-h
9+
* Allow users to estimate parameters for DayZSynthesizer - Issue [#2666](https://github.com/sdv-dev/SDV/issues/2666) by @R-Palazzo
10+
11+
### Bugs Fixed
12+
13+
* Minimum tests failing - OSError: [WinError 1114] A dynamic link library (DLL) initialization routine failed - Issue [#2725](https://github.com/sdv-dev/SDV/issues/2725) by @amontanez24
14+
* [DayZ Parameters] `'missing_values_proportion'` must be zero for any key columns - Issue [#2708](https://github.com/sdv-dev/SDV/issues/2708) by @frances-h
15+
* [DayZ Parameters] Validation results in unexpected errors for some edge cases - Issue [#2703](https://github.com/sdv-dev/SDV/issues/2703) by @fealho
16+
* [DayZ Parameters] `create_parameters` should fall back to default parameters if parameters cannot be detected - Issue [#2702](https://github.com/sdv-dev/SDV/issues/2702) by @fealho
17+
* [DayZ Parameters] DayZ parameter validation does not validate DAYZ_SPEC_VERSION - Issue [#2701](https://github.com/sdv-dev/SDV/issues/2701) by @R-Palazzo
18+
* [DayZParameters] `KeyError` when creating parameters with empty data and metadata - Issue [#2700](https://github.com/sdv-dev/SDV/issues/2700) by @fealho
19+
* Unable to load the DayZSynthesizer after saving it - Issue [#2698](https://github.com/sdv-dev/SDV/issues/2698) by @R-Palazzo
20+
* `DayZSynthesizer.create_parameters` errors in Colab with numeric columns - Issue [#2683](https://github.com/sdv-dev/SDV/issues/2683) by @frances-h
21+
* PARSynthesizer: `FutureWarnings` in `groupby.apply` and `Series.__getitem__` from pandas - Issue [#2682](https://github.com/sdv-dev/SDV/issues/2682) by @R-Palazzo
22+
323
## v1.27.0 - 2025-09-15
424

525
### New Features

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ namespaces = false
143143
version = {attr = 'sdv.__version__'}
144144

145145
[tool.bumpversion]
146-
current_version = "1.27.1.dev0"
146+
current_version = "1.28.1.dev0"
147147
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
148148
serialize = [
149149
'{major}.{minor}.{patch}.{release}{candidate}',

sdv/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
__author__ = 'DataCebo, Inc.'
88
__email__ = 'info@sdv.dev'
9-
__version__ = '1.27.1.dev0'
9+
__version__ = '1.28.1.dev0'
1010

1111

1212
import sys

sdv/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def check_sdv_versions_and_warn(synthesizer):
273273
"""
274274
current_community_version = getattr(version, 'community', None)
275275
current_enterprise_version = getattr(version, 'enterprise', None)
276-
if synthesizer._fitted:
276+
if getattr(synthesizer, '_fitted', False):
277277
fitted_community_version = getattr(synthesizer, '_fitted_sdv_version', None)
278278
fitted_enterprise_version = getattr(synthesizer, '_fitted_sdv_enterprise_version', None)
279279
community_mismatch = current_community_version != fitted_community_version

sdv/multi_table/_dayz_utils.py

Lines changed: 0 additions & 52 deletions
This file was deleted.

sdv/multi_table/dayz.py

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
"""Multi-Table DayZ parameter detection and creation."""
22

3+
import json
4+
5+
import pandas as pd
6+
7+
from sdv.cag._utils import _is_list_of_type
38
from sdv.errors import SynthesizerInputError, SynthesizerProcessingError
4-
from sdv.multi_table._dayz_utils import create_parameters_multi_table
5-
from sdv.single_table.dayz import _validate_parameter_structure, _validate_tables_parameter
9+
from sdv.single_table.dayz import (
10+
_validate_parameter_structure,
11+
_validate_tables_parameter,
12+
create_parameters,
13+
)
614

715
REQUIRED_RELATIONSHIP_KEYS = [
816
'parent_table_name',
@@ -18,6 +26,53 @@
1826
DEFAULT_NUM_ROWS = 1000
1927

2028

29+
def _detect_relationship_parameters(data, metadata):
30+
"""Detect all relationship-level for the DayZ parameters.
31+
32+
The relationship-level parameters are:
33+
- The min and max cardinality
34+
35+
Args:
36+
data (dict[str, pd.DataFrame]): The input data.
37+
metadata (Metadata): The metadata object.
38+
39+
Returns:
40+
dict: A list containing the detected parameters.
41+
"""
42+
relationship_parameters = []
43+
for relationship in metadata.relationships:
44+
rel_tuple = (
45+
relationship['parent_table_name'],
46+
relationship['child_table_name'],
47+
relationship['parent_primary_key'],
48+
relationship['child_foreign_key'],
49+
)
50+
cardinality_table = pd.DataFrame(index=data[rel_tuple[0]][rel_tuple[2]].copy())
51+
cardinality_table['cardinality'] = data[rel_tuple[1]][rel_tuple[3]].value_counts()
52+
cardinality_table = cardinality_table.fillna(0)
53+
relationship_parameters.append({
54+
'parent_table_name': rel_tuple[0],
55+
'child_table_name': rel_tuple[1],
56+
'parent_primary_key': rel_tuple[2],
57+
'child_foreign_key': rel_tuple[3],
58+
'min_cardinality': cardinality_table['cardinality'].min(),
59+
'max_cardinality': cardinality_table['cardinality'].max(),
60+
})
61+
62+
return relationship_parameters
63+
64+
65+
def create_parameters_multi_table(data, metadata, output_filename):
66+
"""Create parameters for the DayZSynthesizer."""
67+
parameters = create_parameters(data, metadata, None)
68+
parameters['relationships'] = _detect_relationship_parameters(data, metadata)
69+
if output_filename:
70+
with open(output_filename, 'w') as f:
71+
json.dump(parameters, f, indent=4)
72+
73+
return parameters
74+
75+
2176
def _validate_min_cardinality(relationship):
2277
min_cardinality = relationship['min_cardinality']
2378
if not isinstance(min_cardinality, int) or min_cardinality < 0:
@@ -48,8 +103,10 @@ def _validate_cardinality_bounds(relationship):
48103

49104

50105
def _validate_relationship_structure(dayz_parameters):
51-
if not isinstance(dayz_parameters.get('relationships', []), list):
52-
raise SynthesizerProcessingError("The 'relationships' parameter value must be a list.")
106+
if not _is_list_of_type(dayz_parameters.get('relationships', []), dict):
107+
raise SynthesizerProcessingError(
108+
"The 'relationships' parameter value must be a list of dictionaries."
109+
)
53110

54111
for relationship in dayz_parameters.get('relationships', []):
55112
unknown_relationship_parameters = relationship.keys() - set(RELATIONSHIP_PARAMETER_KEYS)
@@ -160,18 +217,18 @@ def __init__(self, metadata, locales=['en_US']):
160217
)
161218

162219
@classmethod
163-
def create_parameters(cls, data, metadata, output_filename=None):
220+
def create_parameters(cls, data, metadata, filepath=None):
164221
"""Create parameters for the DayZSynthesizer.
165222
166223
Args:
167224
data (dict[str, pd.DataFrame]): The input data.
168225
metadata (Metadata): The metadata object.
169-
output_filename (str, optional): The output filename for the parameters.
226+
filepath (str, optional): The output filename for the parameters.
170227
171228
Returns:
172229
dict: The created parameters.
173230
"""
174-
return create_parameters_multi_table(data, metadata, output_filename)
231+
return create_parameters_multi_table(data, metadata, filepath)
175232

176233
@staticmethod
177234
def validate_parameters(metadata, parameters):

sdv/sequential/par.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
import tqdm
1212
from rdt.transformers import FloatFormatter
1313

14-
from sdv._utils import MODELABLE_SDTYPES, _cast_to_iterable, _groupby_list
14+
from sdv._utils import MODELABLE_SDTYPES, _cast_to_iterable, _groupby_list, _is_datetime_type
1515
from sdv.cag import ProgrammableConstraint
1616
from sdv.cag._utils import _validate_constraints_single_table
17+
from sdv.constraints.utils import cast_to_datetime64
1718
from sdv.errors import SamplingError, SynthesizerInputError
1819
from sdv.metadata.errors import InvalidMetadataError
1920
from sdv.metadata.metadata import Metadata
@@ -37,6 +38,11 @@
3738
LOGGER = logging.getLogger(__name__)
3839

3940

41+
def _diff_and_bfill(series):
42+
"""Compute the diff of a pandas Series and backfill the first NaN."""
43+
return series.diff().bfill()
44+
45+
4046
class PARSynthesizer(LossValuesMixin, MissingModuleMixin, BaseSynthesizer):
4147
"""Synthesizer for sequential data.
4248
@@ -310,20 +316,25 @@ def _transform_sequence_index(self, data):
310316
sequence_index_context = sequence_index_context.rename(
311317
columns={self._sequence_index: f'{self._sequence_index}.context'}
312318
)
319+
320+
if _is_datetime_type(sequence_index[self._sequence_index]):
321+
sequence_index[self._sequence_index] = cast_to_datetime64(
322+
sequence_index[self._sequence_index]
323+
).astype(np.int64)
324+
313325
if all(sequence_index[self._sequence_key].nunique() == 1):
314-
sequence_index_sequence = sequence_index[[self._sequence_index]].diff().bfill()
326+
diff_series = sequence_index[self._sequence_index].diff().bfill()
315327
else:
316-
sequence_index_sequence = (
317-
sequence_index.groupby(self._sequence_key)
318-
.apply(lambda x: x[self._sequence_index].diff().bfill())
319-
.droplevel(1)
320-
.reset_index()
321-
)
328+
diff_series = sequence_index.groupby(self._sequence_key, group_keys=False)[
329+
self._sequence_index
330+
].transform(_diff_and_bfill)
322331

332+
sequence_index_sequence = diff_series.to_frame(name=self._sequence_index)
323333
if all(sequence_index_sequence[self._sequence_index].isna()):
324334
fill_value = 0
325335
else:
326336
fill_value = min(sequence_index_sequence[self._sequence_index].dropna())
337+
327338
sequence_index_sequence = sequence_index_sequence.fillna(fill_value)
328339

329340
data[self._sequence_index] = sequence_index_sequence[self._sequence_index].to_numpy()
@@ -579,7 +590,7 @@ def _sample_from_par(self, context, sequence_length=None):
579590
pd.DataFrame({self._sequence_index: diffs})
580591
)[self._sequence_index].to_numpy()
581592
start_index = context_columns.index(f'{self._sequence_index}.context')
582-
start = context_values[start_index]
593+
start = context_values.iloc[start_index]
583594
sequence[sequence_index_idx] = np.cumsum(diffs) - diffs[0] + start
584595

585596
# Reformat as a DataFrame

sdv/single_table/_dayz_utils.py

Lines changed: 0 additions & 95 deletions
This file was deleted.

0 commit comments

Comments
 (0)