Skip to content

Commit 0491064

Browse files
authored
Unable to validate just 1 table of a multi-table schema (#2679)
1 parent 219576d commit 0491064

File tree

3 files changed

+35
-13
lines changed

3 files changed

+35
-13
lines changed

sdv/metadata/metadata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,8 @@ def validate_table(self, data, table_name=None):
297297
'to validate.'
298298
)
299299

300-
return self.validate_data({table_name: data})
300+
self._validate_table_exists(table_name)
301+
return self._validate_data({table_name: data}, table_name)
301302

302303
def get_column_names(self, table_name=None, **kwargs):
303304
"""Return a list of column names that match the given metadata keyword arguments."""

sdv/metadata/multi_table.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -914,17 +914,20 @@ def _validate_foreign_keys(self, data):
914914

915915
return [error_msg] if error_msg else []
916916

917-
def validate_data(self, data):
918-
"""Validate the data matches the metadata.
917+
def _validate_data(self, data, table_name=None):
918+
"""Validate the given data matches the metadata.
919919
920920
Checks the following rules:
921-
* all tables of the metadata are present in the data
922921
* every table of the data satisfies its own metadata
923-
* all foreign keys belong to a primay key
922+
* if no table_name provided, all tables of the metadata are present in the data
923+
* if no table_name provided, that all foreign keys belong to a primay key
924924
925925
Args:
926926
data (dict):
927927
A dictionary of table names to pd.DataFrames.
928+
table_name (str, optional):
929+
The specific table to validate. If set, only validates the data for the
930+
table. If None, validates the data for all tables. Defaults to None.
928931
929932
Raises:
930933
InvalidDataError:
@@ -938,13 +941,35 @@ def validate_data(self, data):
938941
raise InvalidMetadataError('Please pass in a dictionary mapping tables to dataframes.')
939942

940943
errors = []
941-
errors += self._validate_missing_tables(data)
944+
errors += self._validate_missing_tables(data) if not table_name else []
942945
errors += self._validate_all_tables(data)
943-
errors += self._validate_foreign_keys(data)
946+
errors += self._validate_foreign_keys(data) if not table_name else []
944947

945948
if errors:
946949
raise InvalidDataError(errors)
947950

951+
def validate_data(self, data):
952+
"""Validate the data matches the metadata.
953+
954+
Checks the following rules:
955+
* every table of the data satisfies its own metadata
956+
* all tables of the metadata are present in the data
957+
* all foreign keys belong to a primay key
958+
959+
Args:
960+
data (dict):
961+
A dictionary of table names to pd.DataFrames.
962+
963+
Raises:
964+
InvalidDataError:
965+
This error is being raised if the data is not matching its sdtype requirements.
966+
967+
Warns:
968+
A warning is being raised if ``datetime_format`` is missing from a column represented
969+
as ``object`` in the dataframe and its sdtype is ``datetime``.
970+
"""
971+
self._validate_data(data)
972+
948973
def add_table(self, table_name):
949974
"""Add a table to the metadata.
950975

tests/unit/metadata/test_metadata.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import pandas as pd
55
import pytest
66

7-
from sdv.errors import InvalidDataError
87
from sdv.metadata.errors import InvalidMetadataError
98
from sdv.metadata.metadata import Metadata
109
from sdv.metadata.single_table import SingleTableMetadata
@@ -584,18 +583,15 @@ def test_validate_table(self):
584583
)
585584
table = get_multi_table_data()['nesreca']
586585

587-
expected_error_wrong_name = re.escape(
588-
'The provided data does not match the metadata:\n'
589-
"The provided data is missing the tables {'nesreca'}."
590-
)
586+
expected_error_wrong_name = re.escape("Unknown table name ('wrong_name').")
591587
expected_error_mutli_table = re.escape(
592588
'Metadata contains more than one table, please specify the `table_name` to validate.'
593589
)
594590

595591
# Run and Assert
596592
metadata_single_table.validate_table(table)
597593
metadata_single_table.validate_table(table, 'nesreca')
598-
with pytest.raises(InvalidDataError, match=expected_error_wrong_name):
594+
with pytest.raises(InvalidMetadataError, match=expected_error_wrong_name):
599595
metadata_single_table.validate_table(table, 'wrong_name')
600596
with pytest.raises(InvalidMetadataError, match=expected_error_mutli_table):
601597
metadata_multi_table.validate_table(table)

0 commit comments

Comments
 (0)