Skip to content

Commit d0b4ec1

Browse files
committed
remove dayz_utils file multi-table
1 parent 28e8fa1 commit d0b4ec1

File tree

6 files changed

+163
-187
lines changed

6 files changed

+163
-187
lines changed

sdv/multi_table/_dayz_utils.py

Lines changed: 0 additions & 52 deletions
This file was deleted.

sdv/multi_table/dayz.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
"""Multi-Table DayZ parameter detection and creation."""
22

3+
import json
4+
5+
import pandas as pd
6+
37
from sdv.cag._utils import _is_list_of_type
48
from sdv.errors import SynthesizerInputError, SynthesizerProcessingError
5-
from sdv.multi_table._dayz_utils import create_parameters_multi_table
6-
from sdv.single_table.dayz import _validate_parameter_structure, _validate_tables_parameter
9+
from sdv.single_table.dayz import (
10+
_validate_parameter_structure,
11+
_validate_tables_parameter,
12+
create_parameters,
13+
)
714

815
REQUIRED_RELATIONSHIP_KEYS = [
916
'parent_table_name',
@@ -19,6 +26,53 @@
1926
DEFAULT_NUM_ROWS = 1000
2027

2128

29+
def _detect_relationship_parameters(data, metadata):
30+
"""Detect all relationship-level for the DayZ parameters.
31+
32+
The relationship-level parameters are:
33+
- The min and max cardinality
34+
35+
Args:
36+
data (dict[str, pd.DataFrame]): The input data.
37+
metadata (Metadata): The metadata object.
38+
39+
Returns:
40+
dict: A list containing the detected parameters.
41+
"""
42+
relationship_parameters = []
43+
for relationship in metadata.relationships:
44+
rel_tuple = (
45+
relationship['parent_table_name'],
46+
relationship['child_table_name'],
47+
relationship['parent_primary_key'],
48+
relationship['child_foreign_key'],
49+
)
50+
cardinality_table = pd.DataFrame(index=data[rel_tuple[0]][rel_tuple[2]].copy())
51+
cardinality_table['cardinality'] = data[rel_tuple[1]][rel_tuple[3]].value_counts()
52+
cardinality_table = cardinality_table.fillna(0)
53+
relationship_parameters.append({
54+
'parent_table_name': rel_tuple[0],
55+
'child_table_name': rel_tuple[1],
56+
'parent_primary_key': rel_tuple[2],
57+
'child_foreign_key': rel_tuple[3],
58+
'min_cardinality': cardinality_table['cardinality'].min(),
59+
'max_cardinality': cardinality_table['cardinality'].max(),
60+
})
61+
62+
return relationship_parameters
63+
64+
65+
def create_parameters_multi_table(data, metadata, output_filename):
66+
"""Create parameters for the DayZSynthesizer."""
67+
parameters = create_parameters(data, metadata, None)
68+
parameters['relationships'] = _detect_relationship_parameters(data, metadata)
69+
if output_filename:
70+
with open(output_filename, 'w') as f:
71+
json.dump(parameters, f, indent=4)
72+
73+
return parameters
74+
75+
2276
def _validate_min_cardinality(relationship):
2377
min_cardinality = relationship['min_cardinality']
2478
if not isinstance(min_cardinality, int) or min_cardinality < 0:

sdv/single_table/dayz.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -150,26 +150,6 @@ def create_parameters(data, metadata, output_filename):
150150
if len(metadata.tables) == 0:
151151
raise ValueError('Metadata is empty')
152152

153-
metadata.validate()
154-
datas = data if isinstance(data, dict) else {metadata._get_single_table_name(): data}
155-
metadata.validate_data(datas)
156-
parameters = {'DAYZ_SPEC_VERSION': 'V1', 'tables': {}}
157-
for table_name, table_data in datas.items():
158-
parameters['tables'][table_name] = {}
159-
parameters['tables'][table_name].update(_detect_table_parameters(table_data))
160-
parameters['tables'][table_name].update(
161-
_detect_column_parameters(table_data, metadata, table_name)
162-
)
163-
164-
if output_filename:
165-
with open(output_filename, 'w') as f:
166-
json.dump(parameters, f, indent=4)
167-
168-
return parameters
169-
170-
171-
def create_parameters(data, metadata, output_filename):
172-
"""Detect and create a parameter dict for the DayZ model."""
173153
metadata.validate()
174154
datas = data if isinstance(data, dict) else {metadata._get_single_table_name(): data}
175155
metadata.validate_data(datas)

tests/unit/multi_table/test__dayz_utils.py

Lines changed: 0 additions & 113 deletions
This file was deleted.

tests/unit/multi_table/test_dayz.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import re
23
from unittest.mock import call, patch
34

@@ -9,10 +10,12 @@
910
from sdv.metadata import Metadata
1011
from sdv.multi_table.dayz import (
1112
DayZSynthesizer,
13+
_detect_relationship_parameters,
1214
_validate_cardinality,
1315
_validate_parameters,
1416
_validate_relationship_parameters,
1517
_validate_relationship_structure,
18+
create_parameters_multi_table,
1619
)
1720

1821

@@ -52,6 +55,109 @@ def metadata():
5255
})
5356

5457

58+
def test__detect_relationship_parameters():
59+
"""Test the `_detect_relationship_parameters` method."""
60+
# Setup
61+
parent_data = pd.DataFrame({'parent_id': [1, 2, 3, 4, 5]})
62+
child_data = pd.DataFrame({
63+
'child_id': [10, 11, 12, 13, 14, 15, 16],
64+
'parent_id': [1, 1, 2, 2, 2, 3, None],
65+
})
66+
data = {'parent': parent_data, 'child': child_data}
67+
metadata_dict = {
68+
'tables': {
69+
'parent': {'columns': {'parent_id': {'sdtype': 'id'}}, 'primary_key': 'parent_id'},
70+
'child': {
71+
'columns': {'child_id': {'sdtype': 'id'}, 'parent_id': {'sdtype': 'id'}},
72+
'primary_key': 'child_id',
73+
},
74+
},
75+
'relationships': [
76+
{
77+
'parent_table_name': 'parent',
78+
'child_table_name': 'child',
79+
'parent_primary_key': 'parent_id',
80+
'child_foreign_key': 'parent_id',
81+
}
82+
],
83+
}
84+
metadata = Metadata.load_from_dict(metadata_dict)
85+
86+
# Run
87+
result = _detect_relationship_parameters(data, metadata)
88+
89+
# Assert
90+
expected = [
91+
{
92+
'parent_table_name': 'parent',
93+
'child_table_name': 'child',
94+
'parent_primary_key': 'parent_id',
95+
'child_foreign_key': 'parent_id',
96+
'min_cardinality': 0,
97+
'max_cardinality': 3,
98+
}
99+
]
100+
assert result == expected
101+
102+
103+
@patch('sdv.multi_table.dayz._detect_relationship_parameters')
104+
@patch('sdv.multi_table.dayz.create_parameters')
105+
def test_create_parameters_multi_table(mock_create_parameters, mock_detect_relationship, tmp_path):
106+
"""Test the `create_parameters_multi_table` method."""
107+
# Setup
108+
data = pd.DataFrame()
109+
metadata = Metadata()
110+
output_filename = str(tmp_path / 'output.json')
111+
mock_detect_relationship.return_value = {
112+
'["parent_table", "child_table", "parent_pk", "child_fk"]': {
113+
'min_cardinality': 0,
114+
'max_cardinality': 10,
115+
}
116+
}
117+
mock_create_parameters.return_value = {
118+
'DAYZ_SPEC_VERSION': 'V1',
119+
'tables': {
120+
'table_name': {
121+
'num_rows': 100,
122+
'columns': {
123+
'col1': {'missing_values_proportion': 0.1},
124+
'col2': {'missing_values_proportion': 0.2},
125+
},
126+
}
127+
},
128+
}
129+
130+
# Run
131+
result = create_parameters_multi_table(data, metadata, output_filename)
132+
133+
# Assert
134+
mock_create_parameters.assert_called_once_with(data, metadata, None)
135+
mock_detect_relationship.assert_called_once_with(data, metadata)
136+
assert result == {
137+
'DAYZ_SPEC_VERSION': 'V1',
138+
'tables': {
139+
'table_name': {
140+
'num_rows': 100,
141+
'columns': {
142+
'col1': {'missing_values_proportion': 0.1},
143+
'col2': {'missing_values_proportion': 0.2},
144+
},
145+
}
146+
},
147+
'relationships': {
148+
'["parent_table", "child_table", "parent_pk", "child_fk"]': {
149+
'min_cardinality': 0,
150+
'max_cardinality': 10,
151+
}
152+
},
153+
}
154+
assert result == mock_create_parameters.return_value
155+
with open(output_filename) as f:
156+
output = json.load(f)
157+
158+
assert output == result
159+
160+
55161
def test__validate_relationship_structure():
56162
"""Test validating the relationship parameters structure."""
57163
# Setup

0 commit comments

Comments
 (0)