@@ -140,44 +140,49 @@ def test_set_same_operator(session, set_operator):
140140 ],
141141)
142142def test_distinct_set_operator (session , distinct_table , action , operator ):
143- df1 = session .table (distinct_table )
144- df2 = session .table (distinct_table )
143+ try :
144+ original = session .conf .get ("use_simplified_query_generation" )
145+ session .conf .set ("use_simplified_query_generation" , True )
146+ df1 = session .table (distinct_table )
147+ df2 = session .table (distinct_table )
145148
146- df = action (df1 , df2 .distinct ())
147- assert (
148- df .queries ["queries" ][0 ]
149- == f"""( SELECT * FROM { distinct_table } ){ operator } ( SELECT DISTINCT * FROM { distinct_table } )"""
150- )
149+ df = action (df1 , df2 .distinct ())
150+ assert (
151+ df .queries ["queries" ][0 ]
152+ == f"""( SELECT * FROM { distinct_table } ){ operator } ( SELECT DISTINCT * FROM { distinct_table } )"""
153+ )
151154
152- df = action (df1 .distinct (), df2 )
153- assert (
154- df .queries ["queries" ][0 ]
155- == f"""( SELECT DISTINCT * FROM { distinct_table } ){ operator } ( SELECT * FROM { distinct_table } )"""
156- )
155+ df = action (df1 .distinct (), df2 )
156+ assert (
157+ df .queries ["queries" ][0 ]
158+ == f"""( SELECT DISTINCT * FROM { distinct_table } ){ operator } ( SELECT * FROM { distinct_table } )"""
159+ )
157160
158- df = action (df1 , df2 ).distinct ()
159- assert (
160- df .queries ["queries" ][0 ]
161- == f"""SELECT DISTINCT * FROM (( SELECT * FROM { distinct_table } ){ operator } ( SELECT * FROM { distinct_table } ))"""
162- )
161+ df = action (df1 , df2 ).distinct ()
162+ assert (
163+ df .queries ["queries" ][0 ]
164+ == f"""SELECT DISTINCT * FROM (( SELECT * FROM { distinct_table } ){ operator } ( SELECT * FROM { distinct_table } ))"""
165+ )
163166
164- df = action (df1 , df2 .distinct ()).distinct ()
165- assert (
166- df .queries ["queries" ][0 ]
167- == f"""SELECT DISTINCT * FROM (( SELECT * FROM { distinct_table } ){ operator } ( SELECT DISTINCT * FROM { distinct_table } ))"""
168- )
167+ df = action (df1 , df2 .distinct ()).distinct ()
168+ assert (
169+ df .queries ["queries" ][0 ]
170+ == f"""SELECT DISTINCT * FROM (( SELECT * FROM { distinct_table } ){ operator } ( SELECT DISTINCT * FROM { distinct_table } ))"""
171+ )
169172
170- df = action (df1 .distinct (), df2 ).distinct ()
171- assert (
172- df .queries ["queries" ][0 ]
173- == f"""SELECT DISTINCT * FROM (( SELECT DISTINCT * FROM { distinct_table } ){ operator } ( SELECT * FROM { distinct_table } ))"""
174- )
173+ df = action (df1 .distinct (), df2 ).distinct ()
174+ assert (
175+ df .queries ["queries" ][0 ]
176+ == f"""SELECT DISTINCT * FROM (( SELECT DISTINCT * FROM { distinct_table } ){ operator } ( SELECT * FROM { distinct_table } ))"""
177+ )
175178
176- df = action (df1 .distinct (), df2 .distinct ()).distinct ()
177- assert (
178- df .queries ["queries" ][0 ]
179- == f"""SELECT DISTINCT * FROM (( SELECT DISTINCT * FROM { distinct_table } ){ operator } ( SELECT DISTINCT * FROM { distinct_table } ))"""
180- )
179+ df = action (df1 .distinct (), df2 .distinct ()).distinct ()
180+ assert (
181+ df .queries ["queries" ][0 ]
182+ == f"""SELECT DISTINCT * FROM (( SELECT DISTINCT * FROM { distinct_table } ){ operator } ( SELECT DISTINCT * FROM { distinct_table } ))"""
183+ )
184+ finally :
185+ session .conf .set ("use_simplified_query_generation" , original )
181186
182187
183188@pytest .mark .parametrize ("set_operator" , [SET_UNION_ALL , SET_EXCEPT , SET_INTERSECT ])
@@ -1486,19 +1491,58 @@ def test_select_limit_orderby(session):
14861491 [Row (1 , "c" ), Row (3 , "b" ), Row (3 , "c" ), Row (5 , "a" )],
14871492 False ,
14881493 ),
1494+ (
1495+ lambda df : df .sort (col ("a" ), col ("b" )).distinct (),
1496+ lambda table : f"""SELECT DISTINCT * FROM { table } ORDER BY "A" ASC NULLS FIRST, "B" ASC NULLS FIRST""" ,
1497+ [Row (1 , "c" ), Row (3 , "b" ), Row (3 , "c" ), Row (5 , "a" )],
1498+ True ,
1499+ ),
14891500 (
14901501 lambda df : df .select ("a" , "b" ).sort (col ("a" ), col ("b" )).distinct (),
1491- lambda table : f"""SELECT DISTINCT "A", "B" FROM { table } ORDER BY "A" ASC NULLS FIRST, "B" ASC NULLS FIRST""" ,
1502+ lambda table : f"""SELECT DISTINCT * FROM ( SELECT "A", "B" FROM { table } ORDER BY "A" ASC NULLS FIRST, "B" ASC NULLS FIRST) """ ,
14921503 [Row (1 , "c" ), Row (3 , "b" ), Row (3 , "c" ), Row (5 , "a" )],
14931504 True ,
14941505 ),
1506+ # df.sort(A).select(B).distinct()
1507+ (
1508+ lambda df : df .sort (col ("a" )).select ("b" ).distinct (),
1509+ lambda table : f"""SELECT DISTINCT * FROM ( SELECT "B" FROM { table } ORDER BY "A" ASC NULLS FIRST)""" ,
1510+ [Row ("a" ), Row ("b" ), Row ("c" )],
1511+ True ,
1512+ ),
1513+ # df.sort(A).distinct().select(B)
1514+ (
1515+ lambda df : df .sort (col ("a" )).distinct ().select ("b" ),
1516+ lambda table : f"""SELECT "B" FROM ( SELECT DISTINCT * FROM { table } ORDER BY "A" ASC NULLS FIRST)""" ,
1517+ [Row ("a" ), Row ("b" ), Row ("c" ), Row ("c" )],
1518+ True ,
1519+ ),
1520+ # df.filter(A).select(B).distinct()
1521+ (
1522+ lambda df : df .filter (col ("a" ) > 1 ).select ("b" ).distinct (),
1523+ lambda table : f"""SELECT DISTINCT "B" FROM { table } WHERE ("A" > 1)""" ,
1524+ [Row ("a" ), Row ("b" ), Row ("c" )],
1525+ True ,
1526+ ),
1527+ # df.filter(A).distinct().select(B)
1528+ (
1529+ lambda df : df .filter (col ("a" ) > 1 ).distinct ().select ("b" ),
1530+ lambda table : f"""SELECT "B" FROM ( SELECT DISTINCT * FROM { table } WHERE ("A" > 1))""" ,
1531+ [Row ("a" ), Row ("b" ), Row ("c" )],
1532+ True ,
1533+ ),
14951534 ],
14961535)
14971536def test_select_distinct (
14981537 session , distinct_table , operation , expected_query , expected_result , sort_results
14991538):
1500- df = session .table (distinct_table )
1501- df1 = operation (df )
1502- if expected_result is not None :
1503- Utils .check_answer (df1 , expected_result , sort = sort_results )
1504- assert df1 .queries ["queries" ][0 ] == expected_query (distinct_table )
1539+ try :
1540+ original = session .conf .get ("use_simplified_query_generation" )
1541+ session .conf .set ("use_simplified_query_generation" , True )
1542+ df = session .table (distinct_table )
1543+ df1 = operation (df )
1544+ if expected_result is not None :
1545+ Utils .check_answer (df1 , expected_result , sort = sort_results )
1546+ assert df1 .queries ["queries" ][0 ] == expected_query (distinct_table )
1547+ finally :
1548+ session .conf .set ("use_simplified_query_generation" , original )
0 commit comments