Skip to content

Commit d99d157

Browse files
committed
raise on self-join
1 parent 914431e commit d99d157

File tree

7 files changed

+26
-205
lines changed

7 files changed

+26
-205
lines changed

build/lib/data_algebra/db_model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,10 @@ def _natural_join_sub_queries(self, *, join_node, using, temp_id_source):
16171617
sql_right = join_node.sources[1].to_near_sql_implementation_(
16181618
db_model=self, using=using_right, temp_id_source=temp_id_source
16191619
)
1620+
if sql_left.quoted_query_name == sql_right.quoted_query_name:
1621+
raise ValueError("""In join steps left and right subquery must not be identical,
1622+
one can work around this by using an extend() to add a new column on one side of join
1623+
(though one must make sure query optimization does not eliminate such a column).""")
16201624
return using_left, sql_left, using_right, sql_right
16211625

16221626
def natural_join_to_near_sql(
@@ -1673,7 +1677,6 @@ def natural_join_to_near_sql(
16731677
)
16741678
if (self.on_end is not None) and (len(self.on_end) > 0):
16751679
on_terms = on_terms + [self.on_end]
1676-
# TODO: if names match, wrap sub_sql2 (and do it prior to term construction)
16771680
near_sql = data_algebra.near_sql.NearSQLBinaryStep(
16781681
terms=terms,
16791682
query_name=view_name,

coverage.txt

Lines changed: 6 additions & 196 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ tests/test_compound_where.py .. [ 11%]
2424
tests/test_concat_rows.py ... [ 12%]
2525
tests/test_container.py .. [ 13%]
2626
tests/test_cross_product_join.py . [ 13%]
27-
tests/test_dag_elim.py ....F...... [ 17%]
27+
tests/test_dag_elim.py ........... [ 17%]
2828
tests/test_data_model_isolation.py . [ 18%]
2929
tests/test_db_handle.py .. [ 18%]
3030
tests/test_db_model.py . [ 19%]
@@ -118,195 +118,6 @@ tests/test_window_fns.py ..... [ 98%]
118118
tests/test_with.py .. [ 99%]
119119
tests/test_xicor.py .. [100%]
120120

121-
=================================== FAILURES ===================================
122-
______________________________ test_dag_elim_btt _______________________________
123-
124-
def test_dag_elim_btt():
125-
pd = data_algebra.default_data_model.pd
126-
d = pd.DataFrame({
127-
'x': [1, 2, 3],
128-
})
129-
ops = (
130-
descr(d=d)
131-
.natural_join(
132-
b=descr(d=d),
133-
by=['x'],
134-
jointype='left',
135-
)
136-
)
137-
expect = pd.DataFrame({
138-
'x': [1, 2, 3],
139-
})
140-
> data_algebra.test_util.check_transform(ops=ops, data=d, expect=expect)
141-
142-
tests/test_dag_elim.py:128:
143-
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
144-
145-
ops = (
146-
TableDescription(table_name="d", column_names=["x"]).natural_join(
147-
b=TableDescription(table_name="d", column_names=["x"]),
148-
by=["x"],
149-
jointype="LEFT",
150-
)
151-
)
152-
153-
data = {'d': x
154-
0 1
155-
1 2
156-
2 3}, expect = x
157-
0 1
158-
1 2
159-
2 3
160-
161-
def check_transform(
162-
ops,
163-
data,
164-
expect,
165-
*,
166-
float_tol: float = 1e-8,
167-
check_column_order: bool = False,
168-
cols_case_sensitive: bool = False,
169-
check_row_order: bool = False,
170-
check_parse: bool = True,
171-
models_to_skip: Optional[Iterable] = None,
172-
valid_for_empty: bool = True,
173-
empty_produces_empty: bool = True,
174-
) -> None:
175-
"""
176-
Test an operator dag produces the expected result, and parses correctly.
177-
Assert if there are issues.
178-
179-
:param ops: data_algebra.data_ops.ViewRepresentation
180-
:param data: pd.DataFrame or map of strings to pd.DataFrame
181-
:param expect: pd.DataFrame
182-
:param float_tol: passed to equivalent_frames()
183-
:param check_column_order: passed to equivalent_frames()
184-
:param cols_case_sensitive: passed to equivalent_frames()
185-
:param check_row_order: passed to equivalent_frames()
186-
:param check_parse: if True check expression parses/formats to self
187-
:param models_to_skip: None or set of model names or models to skip testing
188-
:param valid_for_empty: logical, if True perform tests on empty inputs
189-
:param empty_produces_empty: logical, if True assume emtpy inputs should produce empty output
190-
:return: nothing
191-
"""
192-
193-
# convert single table to dictionary
194-
if not isinstance(data, dict):
195-
cols_used = ops.columns_used()
196-
table_name = [k for k in cols_used.keys()][0]
197-
data = {table_name: data}
198-
199-
db_handles = [
200-
# non-connected handles, lets us test some of the SQL generation path
201-
data_algebra.SQLite.SQLiteModel().db_handle(None),
202-
data_algebra.BigQuery.BigQueryModel().db_handle(None),
203-
data_algebra.PostgreSQL.PostgreSQLModel().db_handle(None),
204-
data_algebra.SparkSQL.SparkSQLModel().db_handle(None),
205-
data_algebra.MySQL.MySQLModel().db_handle(None),
206-
]
207-
208-
test_dbs = get_test_dbs()
209-
db_handles = db_handles + test_dbs
210-
211-
if models_to_skip is not None:
212-
models_to_skip = {str(m) for m in models_to_skip}
213-
db_handles = [h for h in db_handles if str(h.db_model) not in models_to_skip]
214-
215-
caught: Optional[Any] = None
216-
try:
217-
check_transform_on_handles(
218-
ops=ops,
219-
data=data,
220-
expect=expect,
221-
float_tol=float_tol,
222-
check_column_order=check_column_order,
223-
cols_case_sensitive=cols_case_sensitive,
224-
check_row_order=check_row_order,
225-
check_parse=check_parse,
226-
db_handles=db_handles,
227-
valid_for_empty=valid_for_empty,
228-
empty_produces_empty=empty_produces_empty,
229-
)
230-
except AssertionError as ase:
231-
traceback.print_exc()
232-
caught = ase
233-
except Exception as exc:
234-
traceback.print_exc()
235-
caught = exc
236-
237-
for handle in db_handles:
238-
# noinspection PyBroadException
239-
try:
240-
handle.close()
241-
except Exception:
242-
pass
243-
244-
if caught is not None:
245-
> raise ValueError("testing caught " + str(caught))
246-
E ValueError: testing caught DBHandle(db_model=SQLiteModel, conn=<sqlite3.Connection object at 0x7ff5f8b46990>) error in test Execution failed on sql 'SELECT
247-
E COALESCE("d"."x", "d"."x") AS "x"
248-
E FROM
249-
E (
250-
E "d"
251-
E LEFT JOIN
252-
E "d"
253-
E ON
254-
E "d"."x" = "d"."x"
255-
E )
256-
E ': ambiguous column name: d.x
257-
258-
data_algebra/test_util.py:558: ValueError
259-
----------------------------- Captured stderr call -----------------------------
260-
Traceback (most recent call last):
261-
File "/Users/johnmount/opt/anaconda3/envs/ai_academy_3_9/lib/python3.9/site-packages/pandas/io/sql.py", line 2056, in execute
262-
cur.execute(*args, **kwargs)
263-
sqlite3.OperationalError: ambiguous column name: d.x
264-
265-
The above exception was the direct cause of the following exception:
266-
267-
Traceback (most recent call last):
268-
File "/Users/johnmount/Documents/work/data_algebra/data_algebra/test_util.py", line 225, in _run_handle_experiments
269-
res_db_sql_i = db_handle.read_query(sql_statements[i])
270-
File "/Users/johnmount/Documents/work/data_algebra/data_algebra/db_model.py", line 2284, in read_query
271-
return self.db_model.read_query(conn=self.conn, q=q)
272-
File "/Users/johnmount/Documents/work/data_algebra/data_algebra/db_model.py", line 889, in read_query
273-
r = pandas.io.sql.read_sql(q, conn)
274-
File "/Users/johnmount/opt/anaconda3/envs/ai_academy_3_9/lib/python3.9/site-packages/pandas/io/sql.py", line 602, in read_sql
275-
return pandas_sql.read_query(
276-
File "/Users/johnmount/opt/anaconda3/envs/ai_academy_3_9/lib/python3.9/site-packages/pandas/io/sql.py", line 2116, in read_query
277-
cursor = self.execute(*args)
278-
File "/Users/johnmount/opt/anaconda3/envs/ai_academy_3_9/lib/python3.9/site-packages/pandas/io/sql.py", line 2068, in execute
279-
raise ex from exc
280-
pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT
281-
COALESCE("d"."x", "d"."x") AS "x"
282-
FROM
283-
(
284-
"d"
285-
LEFT JOIN
286-
"d"
287-
ON
288-
"d"."x" = "d"."x"
289-
)
290-
': ambiguous column name: d.x
291-
Traceback (most recent call last):
292-
File "/Users/johnmount/Documents/work/data_algebra/data_algebra/test_util.py", line 530, in check_transform
293-
check_transform_on_handles(
294-
File "/Users/johnmount/Documents/work/data_algebra/data_algebra/test_util.py", line 424, in check_transform_on_handles
295-
_run_handle_experiments(
296-
File "/Users/johnmount/Documents/work/data_algebra/data_algebra/test_util.py", line 247, in _run_handle_experiments
297-
raise ValueError(f"{db_handle} error in test " + str(caught))
298-
ValueError: DBHandle(db_model=SQLiteModel, conn=<sqlite3.Connection object at 0x7ff5f8b46990>) error in test Execution failed on sql 'SELECT
299-
COALESCE("d"."x", "d"."x") AS "x"
300-
FROM
301-
(
302-
"d"
303-
LEFT JOIN
304-
"d"
305-
ON
306-
"d"."x" = "d"."x"
307-
)
308-
': ambiguous column name: d.x
309-
310121
---------- coverage: platform darwin, python 3.9.7-final-0 -----------
311122
Name Stmts Miss Cover Missing
312123
--------------------------------------------------------------------
@@ -324,7 +135,7 @@ data_algebra/data_model.py 13 0 100%
324135
data_algebra/data_ops.py 1212 188 84% 37-38, 57-58, 86, 180, 219, 234, 292, 331, 333, 335, 337, 341, 395, 401, 437, 440, 460, 466, 490, 518, 522, 524, 603, 605, 636, 656, 662, 677, 679, 690, 702, 728, 730, 743, 745, 751, 763, 766, 779, 781, 783, 785, 796, 798, 834, 848, 868, 870, 872, 874, 878, 912-917, 920-930, 946, 970, 1008-1011, 1016, 1020, 1142, 1147, 1152, 1154, 1161, 1163, 1171, 1177, 1179, 1181, 1184, 1187, 1190, 1195, 1212, 1224, 1281, 1283, 1285, 1287, 1289, 1291, 1329, 1339, 1354, 1431, 1433, 1443, 1449, 1452, 1470, 1480, 1535, 1537, 1539, 1542, 1553, 1625, 1627, 1661-1664, 1668, 1672, 1684, 1735, 1739, 1742, 1744, 1773-1776, 1780, 1782, 1794, 1843, 1848, 1853, 1882-1885, 1889, 1891, 1902, 1952, 1957, 1963, 1966, 1982-1985, 1991, 1993, 1995, 1997, 2009, 2135, 2137, 2148, 2208, 2214, 2225, 2228, 2234, 2263, 2282, 2284, 2286, 2297, 2370, 2376, 2378, 2398-2401, 2410, 2412, 2414, 2416, 2427, 2498, 2514-2517, 2521, 2523, 2569, 2641, 2644-2652, 2657, 2676-2685
325136
data_algebra/data_ops_types.py 92 15 84% 84, 98, 108, 321, 331, 336-337, 341-345, 350, 354, 359
326137
data_algebra/data_ops_utils.py 49 5 90% 30, 42, 44, 48, 58
327-
data_algebra/db_model.py 984 82 92% 66, 74, 88, 101, 109-111, 260, 371-372, 448, 871, 931, 936, 958-962, 972, 986-990, 1014, 1026, 1085, 1087, 1093, 1104, 1116, 1148, 1152, 1157, 1190, 1194, 1210, 1213, 1295, 1306, 1311, 1333, 1337, 1379, 1386, 1425, 1429, 1462, 1466, 1488, 1492, 1506, 1542, 1546, 1600, 1604, 1607, 1610, 1635, 1698, 1702, 1706, 1709, 1714, 1752, 1763, 1884, 2002-2003, 2016, 2028, 2053, 2077, 2083, 2089, 2113, 2122, 2157, 2192, 2195, 2202, 2373-2374, 2379-2380, 2384
138+
data_algebra/db_model.py 986 82 92% 66, 74, 88, 101, 109-111, 260, 371-372, 448, 871, 931, 936, 958-962, 972, 986-990, 1014, 1026, 1085, 1087, 1093, 1104, 1116, 1148, 1152, 1157, 1190, 1194, 1210, 1213, 1295, 1306, 1311, 1333, 1337, 1379, 1386, 1425, 1429, 1462, 1466, 1488, 1492, 1506, 1542, 1546, 1600, 1604, 1607, 1610, 1639, 1701, 1705, 1709, 1712, 1717, 1755, 1766, 1887, 2005-2006, 2019, 2031, 2056, 2080, 2086, 2092, 2116, 2125, 2160, 2195, 2198, 2205, 2376-2377, 2382-2383, 2387
328139
data_algebra/eval_cache.py 52 0 100%
329140
data_algebra/expr_parse.py 35 0 100%
330141
data_algebra/expr_rep.py 642 88 86% 208-211, 216, 224, 240, 247, 258, 282, 291-306, 350, 356, 362, 368, 377, 383, 389, 398, 401, 404, 407, 410, 413, 464, 524, 602, 608, 643, 655, 661, 667, 679, 685, 733, 900, 915, 925, 940, 1027, 1042, 1094, 1101-1108, 1114-1115, 1140-1141, 1173, 1180-1181, 1187, 1222, 1224, 1260, 1346, 1360, 1362, 1364, 1367-1373, 1375, 1447, 1461-1463, 1479, 1518
@@ -337,11 +148,10 @@ data_algebra/pandas_model.py 5 0 100%
337148
data_algebra/parse_by_lark.py 159 24 85% 73, 95, 110, 131-132, 139, 146, 156, 170-171, 173, 185, 191, 198-202, 230, 238, 248-251
338149
data_algebra/python3_lark.py 1 0 100%
339150
data_algebra/solutions.py 91 3 97% 59, 253, 300
340-
data_algebra/test_util.py 286 29 90% 87, 107, 117, 120, 124, 147, 150, 154, 157, 166, 221-222, 239-240, 259, 261-269, 315, 318, 326, 337, 344, 348, 359, 370, 544-545, 554-555
151+
data_algebra/test_util.py 286 39 86% 87, 107, 117, 120, 124, 147, 150, 154, 157, 166, 221-222, 238-243, 247, 259, 261-269, 315, 318, 326, 337, 344, 348, 359, 370, 543-548, 554-555, 558
341152
data_algebra/util.py 140 29 79% 26, 51, 56, 61, 84-85, 88-89, 92-93, 96-97, 100-101, 104-105, 108-109, 112-113, 116-117, 120-121, 192, 207, 248, 252, 254
342153
--------------------------------------------------------------------
343-
TOTAL 5684 702 88%
154+
TOTAL 5686 712 87%
155+
344156

345-
=========================== short test summary info ============================
346-
FAILED tests/test_dag_elim.py::test_dag_elim_btt - ValueError: testing caught...
347-
================== 1 failed, 292 passed in 455.32s (0:07:35) ===================
157+
======================= 293 passed in 427.71s (0:07:07) ========================

data_algebra/db_model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1617,6 +1617,10 @@ def _natural_join_sub_queries(self, *, join_node, using, temp_id_source):
16171617
sql_right = join_node.sources[1].to_near_sql_implementation_(
16181618
db_model=self, using=using_right, temp_id_source=temp_id_source
16191619
)
1620+
if sql_left.quoted_query_name == sql_right.quoted_query_name:
1621+
raise ValueError("""In join steps left and right subquery must not be identical,
1622+
one can work around this by using an extend() to add a new column on one side of join
1623+
(though one must make sure query optimization does not eliminate such a column).""")
16201624
return using_left, sql_left, using_right, sql_right
16211625

16221626
def natural_join_to_near_sql(
@@ -1673,7 +1677,6 @@ def natural_join_to_near_sql(
16731677
)
16741678
if (self.on_end is not None) and (len(self.on_end) > 0):
16751679
on_terms = on_terms + [self.on_end]
1676-
# TODO: if names match, wrap sub_sql2 (and do it prior to term construction)
16771680
near_sql = data_algebra.near_sql.NearSQLBinaryStep(
16781681
terms=terms,
16791682
query_name=view_name,
94 Bytes
Binary file not shown.

dist/data_algebra-1.3.2.tar.gz

83 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)