Skip to content

Commit 445a004

Browse files
authored
Merge branch 'main' into latest-dependency-update-ae6e1c0
2 parents 365be2c + a7eb1f8 commit 445a004

File tree

20 files changed

+849
-408
lines changed

20 files changed

+849
-408
lines changed

HISTORY.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
# Release Notes
22

3+
## v1.28.0 - 2025-10-17
4+
5+
### New Features
6+
7+
* Unable to validate just 1 table of a multi-table schema - Issue [#2678](https://github.com/sdv-dev/SDV/issues/2678) by @frances-h
8+
* Allow users to validate the DayZ parameters - Issue [#2667](https://github.com/sdv-dev/SDV/issues/2667) by @frances-h
9+
* Allow users to estimate parameters for DayZSynthesizer - Issue [#2666](https://github.com/sdv-dev/SDV/issues/2666) by @R-Palazzo
10+
11+
### Bugs Fixed
12+
13+
* Minimum tests failing - OSError: [WinError 1114] A dynamic link library (DLL) initialization routine failed - Issue [#2725](https://github.com/sdv-dev/SDV/issues/2725) by @amontanez24
14+
* [DayZ Parameters] `'missing_values_proportion'` must be zero for any key columns - Issue [#2708](https://github.com/sdv-dev/SDV/issues/2708) by @frances-h
15+
* [DayZ Parameters] Validation results in unexpected errors for some edge cases - Issue [#2703](https://github.com/sdv-dev/SDV/issues/2703) by @fealho
16+
* [DayZ Parameters] `create_parameters` should fall back to default parameters if parameters cannot be detected - Issue [#2702](https://github.com/sdv-dev/SDV/issues/2702) by @fealho
17+
* [DayZ Parameters] DayZ parameter validation does not validate DAYZ_SPEC_VERSION - Issue [#2701](https://github.com/sdv-dev/SDV/issues/2701) by @R-Palazzo
18+
* [DayZParameters] `KeyError` when creating parameters with empty data and metadata - Issue [#2700](https://github.com/sdv-dev/SDV/issues/2700) by @fealho
19+
* Unable to load the DayZSynthesizer after saving it - Issue [#2698](https://github.com/sdv-dev/SDV/issues/2698) by @R-Palazzo
20+
* `DayZSynthesizer.create_parameters` errors in Colab with numeric columns - Issue [#2683](https://github.com/sdv-dev/SDV/issues/2683) by @frances-h
21+
* PARSynthesizer: `FutureWarnings` in `groupby.apply` and `Series.__getitem__` from pandas - Issue [#2682](https://github.com/sdv-dev/SDV/issues/2682) by @R-Palazzo
22+
323
## v1.27.0 - 2025-09-15
424

525
### New Features

latest_requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ copulas==0.12.3
33
ctgan==0.11.0
44
deepecho==0.7.0
55
graphviz==0.21
6-
numpy==2.3.3
6+
numpy==2.3.4
77
pandas==2.3.3
88
platformdirs==4.5.0
9-
rdt==1.18.1
9+
rdt==1.18.2
1010
sdmetrics==0.23.0
1111
tqdm==4.67.1

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ namespaces = false
143143
version = {attr = 'sdv.__version__'}
144144

145145
[tool.bumpversion]
146-
current_version = "1.27.1.dev0"
146+
current_version = "1.28.1.dev0"
147147
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
148148
serialize = [
149149
'{major}.{minor}.{patch}.{release}{candidate}',

sdv/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
__author__ = 'DataCebo, Inc.'
88
__email__ = '[email protected]'
9-
__version__ = '1.27.1.dev0'
9+
__version__ = '1.28.1.dev0'
1010

1111

1212
import sys

sdv/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def check_sdv_versions_and_warn(synthesizer):
273273
"""
274274
current_community_version = getattr(version, 'community', None)
275275
current_enterprise_version = getattr(version, 'enterprise', None)
276-
if synthesizer._fitted:
276+
if getattr(synthesizer, '_fitted', False):
277277
fitted_community_version = getattr(synthesizer, '_fitted_sdv_version', None)
278278
fitted_enterprise_version = getattr(synthesizer, '_fitted_sdv_enterprise_version', None)
279279
community_mismatch = current_community_version != fitted_community_version

sdv/multi_table/_dayz_utils.py

Lines changed: 0 additions & 52 deletions
This file was deleted.

sdv/multi_table/dayz.py

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
"""Multi-Table DayZ parameter detection and creation."""
22

3+
import json
4+
5+
import pandas as pd
6+
7+
from sdv.cag._utils import _is_list_of_type
38
from sdv.errors import SynthesizerInputError, SynthesizerProcessingError
4-
from sdv.multi_table._dayz_utils import create_parameters_multi_table
5-
from sdv.single_table.dayz import _validate_parameter_structure, _validate_tables_parameter
9+
from sdv.single_table.dayz import (
10+
_validate_parameter_structure,
11+
_validate_tables_parameter,
12+
create_parameters,
13+
)
614

715
REQUIRED_RELATIONSHIP_KEYS = [
816
'parent_table_name',
@@ -18,6 +26,53 @@
1826
DEFAULT_NUM_ROWS = 1000
1927

2028

29+
def _detect_relationship_parameters(data, metadata):
30+
"""Detect all relationship-level for the DayZ parameters.
31+
32+
The relationship-level parameters are:
33+
- The min and max cardinality
34+
35+
Args:
36+
data (dict[str, pd.DataFrame]): The input data.
37+
metadata (Metadata): The metadata object.
38+
39+
Returns:
40+
dict: A list containing the detected parameters.
41+
"""
42+
relationship_parameters = []
43+
for relationship in metadata.relationships:
44+
rel_tuple = (
45+
relationship['parent_table_name'],
46+
relationship['child_table_name'],
47+
relationship['parent_primary_key'],
48+
relationship['child_foreign_key'],
49+
)
50+
cardinality_table = pd.DataFrame(index=data[rel_tuple[0]][rel_tuple[2]].copy())
51+
cardinality_table['cardinality'] = data[rel_tuple[1]][rel_tuple[3]].value_counts()
52+
cardinality_table = cardinality_table.fillna(0)
53+
relationship_parameters.append({
54+
'parent_table_name': rel_tuple[0],
55+
'child_table_name': rel_tuple[1],
56+
'parent_primary_key': rel_tuple[2],
57+
'child_foreign_key': rel_tuple[3],
58+
'min_cardinality': cardinality_table['cardinality'].min(),
59+
'max_cardinality': cardinality_table['cardinality'].max(),
60+
})
61+
62+
return relationship_parameters
63+
64+
65+
def create_parameters_multi_table(data, metadata, output_filename):
66+
"""Create parameters for the DayZSynthesizer."""
67+
parameters = create_parameters(data, metadata, None)
68+
parameters['relationships'] = _detect_relationship_parameters(data, metadata)
69+
if output_filename:
70+
with open(output_filename, 'w') as f:
71+
json.dump(parameters, f, indent=4)
72+
73+
return parameters
74+
75+
2176
def _validate_min_cardinality(relationship):
2277
min_cardinality = relationship['min_cardinality']
2378
if not isinstance(min_cardinality, int) or min_cardinality < 0:
@@ -48,8 +103,10 @@ def _validate_cardinality_bounds(relationship):
48103

49104

50105
def _validate_relationship_structure(dayz_parameters):
51-
if not isinstance(dayz_parameters.get('relationships', []), list):
52-
raise SynthesizerProcessingError("The 'relationships' parameter value must be a list.")
106+
if not _is_list_of_type(dayz_parameters.get('relationships', []), dict):
107+
raise SynthesizerProcessingError(
108+
"The 'relationships' parameter value must be a list of dictionaries."
109+
)
53110

54111
for relationship in dayz_parameters.get('relationships', []):
55112
unknown_relationship_parameters = relationship.keys() - set(RELATIONSHIP_PARAMETER_KEYS)
@@ -160,18 +217,18 @@ def __init__(self, metadata, locales=['en_US']):
160217
)
161218

162219
@classmethod
163-
def create_parameters(cls, data, metadata, output_filename=None):
220+
def create_parameters(cls, data, metadata, filepath=None):
164221
"""Create parameters for the DayZSynthesizer.
165222
166223
Args:
167224
data (dict[str, pd.DataFrame]): The input data.
168225
metadata (Metadata): The metadata object.
169-
output_filename (str, optional): The output filename for the parameters.
226+
filepath (str, optional): The output filename for the parameters.
170227
171228
Returns:
172229
dict: The created parameters.
173230
"""
174-
return create_parameters_multi_table(data, metadata, output_filename)
231+
return create_parameters_multi_table(data, metadata, filepath)
175232

176233
@staticmethod
177234
def validate_parameters(metadata, parameters):

sdv/sequential/par.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,10 @@ def _get_id_context_columns(self):
378378
if self._get_table_metadata().columns[col]['sdtype'] not in MODELABLE_SDTYPES
379379
]
380380

381+
def _reorder_context_columns(self, context_columns, timeseries_data):
382+
order = {column: i for i, column in enumerate(timeseries_data.columns)}
383+
return sorted(context_columns, key=lambda x: order.get(x, float('inf')))
384+
381385
def _preprocess(self, data):
382386
"""Transform the raw data to numerical space.
383387
@@ -539,6 +543,8 @@ def _fit(self, processed_data):
539543
pandas.DataFrame containing both the sequences,
540544
the entity columns and the context columns.
541545
"""
546+
self.context_columns = self._reorder_context_columns(self.context_columns, processed_data)
547+
542548
if self._sequence_key:
543549
self._fit_context_model(processed_data)
544550

sdv/single_table/_dayz_utils.py

Lines changed: 0 additions & 95 deletions
This file was deleted.

0 commit comments

Comments
 (0)