Skip to content

Commit 9ce7cf5

Browse files
committed
Allow missing_values_proportion but validate it is zero for keys
1 parent 88c493f commit 9ce7cf5

File tree

4 files changed

+16
-23
lines changed

4 files changed

+16
-23
lines changed

sdv/single_table/_dayz_utils.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def detect_column_parameters(data, metadata, table_name):
3535
dict: A dictionary containing the detected parameters.
3636
"""
3737
table_metadata = metadata.tables[table_name]
38-
table_keys = table_metadata._get_primary_and_alternate_keys()
3938
column_parameters = {}
4039
for column_name, column_metadata in table_metadata.columns.items():
4140
column_parameters[column_name] = {}
@@ -69,10 +68,9 @@ def detect_column_parameters(data, metadata, table_name):
6968
'category_values': data[column_name].dropna().unique().tolist()
7069
}
7170

72-
if column_name not in table_keys:
73-
column_parameters[column_name]['missing_values_proportion'] = (
74-
data[column_name].isna().mean().item()
75-
)
71+
column_parameters[column_name]['missing_values_proportion'] = (
72+
data[column_name].isna().mean()
73+
)
7674

7775
return {'columns': column_parameters}
7876

sdv/single_table/dayz.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,6 @@ def _validate_parameter_structure(dayz_parameters):
5757
_validate_table_parameter_dict_keys(table, table_parameters)
5858

5959

60-
def _validate_key_column(column_parameters, column_table_msg):
61-
if 'missing_values_proportion' in column_parameters:
62-
msg = (
63-
f"Invalid 'missing_values_proportion' parameter for {column_table_msg}. Primary "
64-
"and alternate keys can not have the 'missing_values_proportion' parameter set."
65-
)
66-
raise SynthesizerProcessingError(msg)
67-
68-
6960
def _validate_numerical_parameters(column_parameters, column_table_msg):
7061
for param in ['min_value', 'max_value']:
7162
if param in column_parameters and not _is_numerical(column_parameters[param]):
@@ -141,7 +132,7 @@ def _validate_categorical_parameters(column_parameters, column_table_msg):
141132
raise SynthesizerProcessingError(msg)
142133

143134

144-
def _validate_missing_value_parameters(column_parameters, column_table_msg):
135+
def _validate_missing_value_parameters(column_parameters, column_table_msg, is_key_column):
145136
missing_values_proportion = column_parameters['missing_values_proportion']
146137
if not _is_numerical(missing_values_proportion) or (
147138
missing_values_proportion < 0.0 or missing_values_proportion > 1.0
@@ -151,6 +142,12 @@ def _validate_missing_value_parameters(column_parameters, column_table_msg):
151142
'must be a float between 0.0 and 1.0.'
152143
)
153144
raise SynthesizerProcessingError(msg)
145+
elif is_key_column and missing_values_proportion != 0:
146+
msg = (
147+
f"Invalid 'missing_values_proportion' parameter for {column_table_msg}. Primary "
148+
"and alternate keys must have 'missing_values_proportion' parameter be zero."
149+
)
150+
raise SynthesizerProcessingError(msg)
154151

155152

156153
def _validate_column_parameters(table, column, column_metadata, column_parameters, is_key_column):
@@ -166,9 +163,6 @@ def _validate_column_parameters(table, column, column_metadata, column_parameter
166163
)
167164
raise SynthesizerProcessingError(msg)
168165

169-
if is_key_column:
170-
_validate_key_column(column_parameters, column_table_msg)
171-
172166
if sdtype == 'numerical':
173167
_validate_numerical_parameters(column_parameters, column_table_msg)
174168
elif sdtype == 'datetime':
@@ -177,7 +171,7 @@ def _validate_column_parameters(table, column, column_metadata, column_parameter
177171
_validate_categorical_parameters(column_parameters, column_table_msg)
178172

179173
if 'missing_values_proportion' in column_parameters:
180-
_validate_missing_value_parameters(column_parameters, column_table_msg)
174+
_validate_missing_value_parameters(column_parameters, column_table_msg, is_key_column)
181175

182176

183177
def _validate_table_parameters(table, table_metadata, table_parameters):

tests/unit/single_table/test__dayz_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_detect_column_parameter():
5656
# Assert
5757
assert result == {
5858
'columns': {
59-
'pk': {},
59+
'pk': {'missing_values_proportion': 0.0},
6060
'num_col': {
6161
'num_decimal_digits': 1,
6262
'min_value': 1.0,
@@ -77,7 +77,7 @@ def test_detect_column_parameter():
7777
'end_timestamp': '2020-01-03 00:00:00',
7878
'missing_values_proportion': 0.25,
7979
},
80-
'alt_key': {},
80+
'alt_key': {'missing_values_proportion': 0.0},
8181
}
8282
}
8383

tests/unit/single_table/test_dayz.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def test__validate_column_parameter():
9898
column_metadata = {'sdtype': 'id'}
9999
bad_column_parameters = {'invalid_key': None}
100100
bad_missing_value = {'missing_values_proportion': 100}
101+
bad_key_missing_value = {'missing_values_proportion': 0.5}
101102

102103
# Run and Assert
103104
expected_bad_column_msg = re.escape(
@@ -118,10 +119,10 @@ def test__validate_column_parameter():
118119

119120
expected_missing_values_with_key_msg = re.escape(
120121
"Invalid 'missing_values_proportion' parameter for column 'column' in table 'table'. "
121-
"Primary and alternate keys can not have the 'missing_values_proportion' parameter set."
122+
"Primary and alternate keys must have 'missing_values_proportion' parameter be zero."
122123
)
123124
with pytest.raises(SynthesizerProcessingError, match=expected_missing_values_with_key_msg):
124-
_validate_column_parameters('table', 'column', column_metadata, bad_missing_value, True)
125+
_validate_column_parameters('table', 'column', column_metadata, bad_key_missing_value, True)
125126

126127

127128
def test__validate_column_parameters_numerical():

0 commit comments

Comments
 (0)