Skip to content

Commit 8ebe0f5

Browse files
committed
Fix rebase
1 parent 9565ed8 commit 8ebe0f5

File tree

2 files changed

+23
-28
lines changed

2 files changed

+23
-28
lines changed

tests/integration/datasets/test_demo.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def test_get_available_demos_single_table():
1818
'dataset_name': [
1919
'adult',
2020
'alarm',
21+
'asia',
2122
'census',
2223
'census_extended',
2324
'child',
@@ -34,6 +35,7 @@ def test_get_available_demos_single_table():
3435
'size_MB': [
3536
3.91,
3637
4.52,
38+
1.28,
3739
98.17,
3840
4.95,
3941
3.20,
@@ -62,26 +64,18 @@ def test_get_available_demos_single_table():
6264
1,
6365
1,
6466
1,
67+
1,
6568
],
6669
})
6770
pd.testing.assert_frame_equal(tables_info[['dataset_name', 'size_MB', 'num_tables']], expected)
6871

6972

7073
def test_get_available_demos_multi_table():
71-
"""Test multi_table demos listing is returned with expected columns and types."""
74+
"""Test multi_table demos listing is non-empty with valid sizes and table counts."""
7275
# Run
7376
tables_info = get_available_demos('multi_table')
7477

7578
# Assert
76-
expected = pd.DataFrame({
77-
'dataset_name': [
78-
'fake_hotels',
79-
'fake_hotels_extended',
80-
],
81-
'size_MB': [
82-
0.05,
83-
0.07,
84-
],
85-
'num_tables': [2, 2],
86-
})
87-
pd.testing.assert_frame_equal(tables_info[['dataset_name', 'size_MB', 'num_tables']], expected)
79+
assert not tables_info.empty
80+
assert (tables_info['num_tables'] > 1).all()
81+
assert (tables_info['size_MB'] >= 0).all()

tests/integration/utils/test_poc.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -137,26 +137,27 @@ def large_metadata():
137137
})
138138

139139

140-
def test_simplify_schema(capsys, large_data, large_metadata):
140+
def test_simplify_schema(capsys):
141141
"""Test ``simplify_schema`` end to end."""
142142
# Setup
143-
num_estimated_column_before_simplification = _get_total_estimated_columns(large_metadata)
144-
HMASynthesizer(large_metadata)
143+
data, metadata = download_demo('multi_table', 'AustralianFootball')
144+
num_estimated_column_before_simplification = _get_total_estimated_columns(metadata)
145+
HMASynthesizer(metadata)
145146
captured_before_simplification = capsys.readouterr()
146147

147148
# Run
148-
data_simplify, metadata_simplify = simplify_schema(large_data, large_metadata)
149+
data_simplify, metadata_simplify = simplify_schema(data, metadata)
149150
captured_after_simplification = capsys.readouterr()
150151

151152
# Assert
152153
expected_message_before = re.compile(
153154
r'PerformanceAlert: Using the HMASynthesizer on this metadata schema is not recommended\.'
154-
r' To model this data, HMA will generate a large number of columns\. \(1034 columns\)\s+'
155+
r' To model this data, HMA will generate a large number of columns\. \(135934 columns\)\s+'
155156
r'Table Name\s*#\s*Columns in Metadata\s*Est # Columns\s*'
156-
r'great_grandparent\s*1\s*986\s*'
157-
r'grandparent\s*1\s*41\s*'
158-
r'parent\s*1\s*6\s*'
159-
r'child\s*1\s*1\s*'
157+
r'match_stats\s*24\s*24\s*'
158+
r'matches\s*39\s*364\s*'
159+
r'players\s*5\s*330\s*'
160+
r'teams\s*1\s*135216\s*'
160161
r'We recommend simplifying your metadata schema using '
161162
r"'sdv.utils.poc.simplify_schema'\.\s*"
162163
r'If this is not possible, please visit '
@@ -165,18 +166,18 @@ def test_simplify_schema(capsys, large_data, large_metadata):
165166
expected_message_after = re.compile(
166167
r'Success! The schema has been simplified\.\s+'
167168
r'Table Name\s*#\s*Columns \(Before\)\s*#\s*Columns \(After\)\s*'
168-
r'child\s*3\s*0\s*'
169-
r'grandparent\s*3\s*3\s*'
170-
r'great_grandparent\s*2\s*2\s*'
171-
r'parent\s*3\s*2'
169+
r'match_stats\s*28\s*3\s*'
170+
r'matches\s*42\s*21\s*'
171+
r'players\s*6\s*0\s*'
172+
r'teams\s*2\s*2'
172173
)
173174
assert expected_message_before.match(captured_before_simplification.out.strip())
174175
assert expected_message_after.match(captured_after_simplification.out.strip())
175176
metadata_simplify.validate()
176177
metadata_simplify.validate_data(data_simplify)
177178
num_estimated_column_after_simplification = _get_total_estimated_columns(metadata_simplify)
178-
assert num_estimated_column_before_simplification == 1034
179-
assert num_estimated_column_after_simplification == 13
179+
assert num_estimated_column_before_simplification == 173818
180+
assert num_estimated_column_after_simplification == 517
180181

181182

182183
def test_simpliy_nothing_to_simplify():

0 commit comments

Comments
 (0)