Skip to content

Commit 6466a97

Browse files
committed
wip
1 parent c80808d commit 6466a97

File tree

7 files changed

+419
-57
lines changed

7 files changed

+419
-57
lines changed

pyproject.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ classifiers = [
1414
'Programming Language :: Python :: 3.13',
1515
'Topic :: Scientific/Engineering :: Artificial Intelligence',
1616
]
17-
keywords = ['sdv', 'synthetic-data', 'synhtetic-data-generation', 'timeseries', 'single-table', 'multi-table']
17+
keywords = ['sdv', 'synthetic-data', 'synthetic-data-generation', 'timeseries', 'single-table', 'multi-table']
1818
dynamic = ['version']
1919
license = 'BUSL-1.1'
2020
license-files = ['LICENSE']
@@ -29,10 +29,10 @@ dependencies = [
2929
"numpy>=1.24.0;python_version>='3.10' and python_version<'3.12'",
3030
"numpy>=1.26.0;python_version>='3.12' and python_version<'3.13'",
3131
"numpy>=2.1.0;python_version>='3.13'",
32-
"pandas>=1.4.0;python_version<'3.11'",
33-
"pandas>=1.5.0;python_version>='3.11' and python_version<'3.12'",
34-
"pandas>=2.1.1;python_version>='3.12' and python_version<'3.13'",
35-
"pandas>=2.2.3;python_version>='3.13'",
32+
"pandas>=1.4.0,<3;python_version<'3.11'",
33+
"pandas>=1.5.0,<3;python_version>='3.11' and python_version<'3.12'",
34+
"pandas>=2.1.1,<3;python_version>='3.12' and python_version<'3.13'",
35+
"pandas>=2.2.3,<3;python_version>='3.13'",
3636
'tqdm>=4.29',
3737
'copulas>=0.12.1',
3838
'ctgan>=0.11.1',

sdv/metadata/multi_table.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,16 @@ def _validate_child_map_circular_relationship(self, child_map):
161161
)
162162

163163
def _validate_foreign_child_key(self, child_table_name, parent_table_name, child_foreign_key):
164-
child_primary_key = _cast_to_iterable(self.tables[child_table_name].primary_key)
165-
child_foreign_key = _cast_to_iterable(child_foreign_key)
166-
if set(child_foreign_key).intersection(set(child_primary_key)):
167-
raise InvalidMetadataError(
168-
f"Invalid relationship between table '{parent_table_name}' and table "
169-
f"'{child_table_name}'. A relationship must connect a primary key "
170-
'with a non-primary key.'
171-
)
164+
pass
165+
# child_primary_key = _cast_to_iterable(self.tables[child_table_name].primary_key)
166+
# child_foreign_key = _cast_to_iterable(child_foreign_key)
167+
# # breakpoint()
168+
# if set(child_foreign_key).intersection(set(child_primary_key)):
169+
# raise InvalidMetadataError(
170+
# f"Invalid relationship between table '{parent_table_name}' and table "
171+
# f"'{child_table_name}'. A relationship must connect a primary key "
172+
# 'with a non-primary key.'
173+
# )
172174

173175
def _validate_new_foreign_key_is_not_reused(
174176
self, parent_table_name, parent_primary_key, child_table_name, child_foreign_key
@@ -312,7 +314,6 @@ def add_relationship(
312314
different
313315
``sdtype``.
314316
- ``InvalidMetadataError`` if the relationship causes a circular dependency.
315-
- ``InvalidMetadataError`` if ``child_foreign_key`` is a primary key.
316317
"""
317318
self._validate_relationship(
318319
parent_table_name, child_table_name, parent_primary_key, child_foreign_key
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import pandas as pd
2+
import pytest
3+
4+
from sdv.metadata import MultiTableMetadata
5+
6+
7+
@pytest.fixture()
8+
def primary_key_to_primary_key():
9+
metadata = MultiTableMetadata.load_from_dict({
10+
'tables': {
11+
'tableA': {
12+
'columns': {
13+
'table_A_primary_key': {'sdtype': 'id'},
14+
'column_1': {'sdtype': 'categorical'},
15+
},
16+
'primary_key': 'table_A_primary_key',
17+
},
18+
'tableB': {
19+
'columns': {
20+
'table_B_primary_key': {'sdtype': 'id'},
21+
'column_2': {'sdtype': 'categorical'},
22+
},
23+
'primary_key': 'table_B_primary_key',
24+
},
25+
},
26+
'relationships': [
27+
{
28+
'parent_table_name': 'tableA',
29+
'parent_primary_key': 'table_A_primary_key',
30+
'child_table_name': 'tableB',
31+
'child_foreign_key': 'table_B_primary_key',
32+
}
33+
],
34+
})
35+
data = {
36+
'tableA': pd.DataFrame({
37+
'table_A_primary_key': range(5),
38+
'column_1': ['A', 'B', 'B', 'C', 'C'],
39+
}),
40+
'tableB': pd.DataFrame({
41+
'table_B_primary_key': range(5),
42+
'column_2': ['A', 'B', 'B', 'C', 'C'],
43+
}),
44+
}
45+
return data, metadata

tests/integration/metadata/test_metadata.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919

2020
def test_metadata():
2121
"""Test ``MultiTableMetadata``."""
22-
# Create an instance
22+
# Setup
2323
instance = Metadata()
2424

25-
# To dict
25+
# Run
2626
result = instance.to_dict()
2727

2828
# Assert
@@ -278,7 +278,6 @@ def test_detect_from_dataframes_single_table():
278278
"""Test the ``detect_from_dataframes`` method works with a single table."""
279279
# Setup
280280
data, _ = download_demo(modality='multi_table', dataset_name='fake_hotels')
281-
282281
metadata = Metadata.detect_from_dataframes({'table_1': data['hotels']})
283282

284283
# Run
@@ -883,25 +882,19 @@ def test_detect_from_dataframes_invalid_format():
883882

884883
def test_no_duplicated_foreign_key_relationships_are_generated():
885884
# Setup
886-
parent_a = pd.DataFrame(
887-
data={
888-
'id': ['id-' + str(i) for i in range(100)],
889-
'col1': [round(i, 2) for i in np.random.uniform(low=0, high=10, size=100)],
890-
}
891-
)
892-
parent_b = pd.DataFrame(
893-
data={
894-
'id': ['id-' + str(i) for i in range(100)],
895-
'col2': [round(i, 2) for i in np.random.uniform(low=0, high=10, size=100)],
896-
}
897-
)
885+
parent_a = pd.DataFrame({
886+
'id': ['id-' + str(i) for i in range(100)],
887+
'col1': [round(i, 2) for i in np.random.uniform(low=0, high=10, size=100)],
888+
})
889+
parent_b = pd.DataFrame({
890+
'id': ['id-' + str(i) for i in range(100)],
891+
'col2': [round(i, 2) for i in np.random.uniform(low=0, high=10, size=100)],
892+
})
898893

899-
child_c = pd.DataFrame(
900-
data={
901-
'id': ['id-' + str(i) for i in np.random.randint(0, 100, size=1000)],
902-
'col3': [round(i, 2) for i in np.random.uniform(low=0, high=10, size=1000)],
903-
}
904-
)
894+
child_c = pd.DataFrame({
895+
'id': ['id-' + str(i) for i in np.random.randint(0, 100, size=1000)],
896+
'col3': [round(i, 2) for i in np.random.uniform(low=0, high=10, size=1000)],
897+
})
905898

906899
data = {'parent_a': parent_a, 'parent_b': parent_b, 'child_c': child_c}
907900

@@ -910,12 +903,18 @@ def test_no_duplicated_foreign_key_relationships_are_generated():
910903

911904
# Assert
912905
assert metadata.relationships == [
906+
{
907+
'child_foreign_key': 'id',
908+
'child_table_name': 'parent_b',
909+
'parent_primary_key': 'id',
910+
'parent_table_name': 'parent_a',
911+
},
913912
{
914913
'parent_table_name': 'parent_a',
915914
'child_table_name': 'child_c',
916915
'parent_primary_key': 'id',
917916
'child_foreign_key': 'id',
918-
}
917+
},
919918
]
920919

921920

0 commit comments

Comments
 (0)