@@ -137,26 +137,27 @@ def large_metadata():
137137 })
138138
139139
140- def test_simplify_schema (capsys , large_data , large_metadata ):
140+ def test_simplify_schema (capsys ):
141141 """Test ``simplify_schema`` end to end."""
142142 # Setup
143- num_estimated_column_before_simplification = _get_total_estimated_columns (large_metadata )
144- HMASynthesizer (large_metadata )
143+ data , metadata = download_demo ('multi_table' , 'AustralianFootball' )
144+ num_estimated_column_before_simplification = _get_total_estimated_columns (metadata )
145+ HMASynthesizer (metadata )
145146 captured_before_simplification = capsys .readouterr ()
146147
147148 # Run
148- data_simplify , metadata_simplify = simplify_schema (large_data , large_metadata )
149+ data_simplify , metadata_simplify = simplify_schema (data , metadata )
149150 captured_after_simplification = capsys .readouterr ()
150151
151152 # Assert
152153 expected_message_before = re .compile (
153154 r'PerformanceAlert: Using the HMASynthesizer on this metadata schema is not recommended\.'
154- r' To model this data, HMA will generate a large number of columns\. \(1034 columns\)\s+'
155+ r' To model this data, HMA will generate a large number of columns\. \(135934 columns\)\s+'
155156 r'Table Name\s*#\s*Columns in Metadata\s*Est # Columns\s*'
156- r'great_grandparent \s*1 \s*986 \s*'
157- r'grandparent \s*1 \s*41 \s*'
158- r'parent \s*1 \s*6 \s*'
159- r'child \s*1\s*1 \s*'
157+ r'match_stats \s*24 \s*24 \s*'
158+ r'matches \s*39 \s*364 \s*'
159+ r'players \s*5 \s*330 \s*'
160+ r'teams \s*1\s*135216 \s*'
160161 r'We recommend simplifying your metadata schema using '
161162 r"'sdv.utils.poc.simplify_schema'\.\s*"
162163 r'If this is not possible, please visit '
@@ -165,18 +166,18 @@ def test_simplify_schema(capsys, large_data, large_metadata):
165166 expected_message_after = re .compile (
166167 r'Success! The schema has been simplified\.\s+'
167168 r'Table Name\s*#\s*Columns \(Before\)\s*#\s*Columns \(After\)\s*'
168- r'child \s*3 \s*0 \s*'
169- r'grandparent \s*3 \s*3 \s*'
170- r'great_grandparent \s*2 \s*2 \s*'
171- r'parent \s*3 \s*2'
169+ r'match_stats \s*28 \s*3 \s*'
170+ r'matches \s*42 \s*21 \s*'
171+ r'players \s*6 \s*0 \s*'
172+ r'teams \s*2 \s*2'
172173 )
173174 assert expected_message_before .match (captured_before_simplification .out .strip ())
174175 assert expected_message_after .match (captured_after_simplification .out .strip ())
175176 metadata_simplify .validate ()
176177 metadata_simplify .validate_data (data_simplify )
177178 num_estimated_column_after_simplification = _get_total_estimated_columns (metadata_simplify )
178- assert num_estimated_column_before_simplification == 1034
179- assert num_estimated_column_after_simplification == 13
179+ assert num_estimated_column_before_simplification == 173818
180+ assert num_estimated_column_after_simplification == 517
180181
181182
182183def test_simpliy_nothing_to_simplify ():
0 commit comments