diff --git a/pydough/conversion/join_agg_transpose.py b/pydough/conversion/join_agg_transpose.py new file mode 100644 index 000000000..a1fd77174 --- /dev/null +++ b/pydough/conversion/join_agg_transpose.py @@ -0,0 +1,187 @@ +""" """ + +__all__ = ["pull_joins_after_aggregates"] + + +from collections.abc import Iterable + +import pydough.pydough_operators as pydop +from pydough.relational import ( + Aggregate, + CallExpression, + ColumnReference, + ColumnReferenceFinder, + Join, + JoinCardinality, + JoinType, + Project, + RelationalExpression, + RelationalNode, + RelationalRoot, + RelationalShuttle, +) + + +class JoinAggregateTransposeShuttle(RelationalShuttle): + """ + TODO + """ + + def __init__(self): + self.finder: ColumnReferenceFinder = ColumnReferenceFinder() + + def reset(self): + self.finder.reset() + + def visit_join(self, node: Join) -> RelationalNode: + result: RelationalNode | None = None + + # Attempt the transpose where the left input is an Aggregate. If it + # succeeded, use that as the result and recursively transform its + # inputs. + if isinstance(node.inputs[0], Aggregate): + result = self.join_aggregate_transpose(node, node.inputs[0], True) + if result is not None: + return self.generic_visit_inputs(result) + + # If the attempt failed, then attempt the transpose where the right + # input is an Aggregate. If this attempt succeeded, use that as the + # result and recursively transform its inputs. + if isinstance(node.inputs[1], Aggregate): + result = self.join_aggregate_transpose(node, node.inputs[1], False) + if result is not None: + return self.generic_visit_inputs(result) + + # If this attempt failed, fall back to the regular implementation. + return super().visit_join(node) + + def generate_name(self, base: str, used_names: Iterable[str]) -> str: + """ + Generates a new name for a column based on the base name and the existing + columns in the join. This is used to ensure that the new column names are + unique and do not conflict with existing names. + """ + if base not in used_names: + return base + i = 0 + while True: + name = f"{base}_{i}" + if name not in used_names: + return name + i += 1 + + def join_aggregate_transpose( + self, join: Join, aggregate: Aggregate, is_left: bool + ) -> RelationalNode | None: + """ + Transposes a Join above an Aggregate into an Aggregate above a Join, + when possible and it would be better for performance to use the join + first to filter some of the rows before aggregating. + + Args: + `join`: the Join node above the Aggregate. + `aggregate`: the Aggregate node that is the left input to the Join. + `is_left`: whether the Aggregate is the left input to the Join + (True) or the right input (False). + + Returns: + The new RelationalNode tree with the Join and Aggregate transposed, + or None if the transpose is not possible. + """ + # Verify that the join is an inner, left, or semi-join, and that the + # join cardinality is singular (unless the aggregations are not affected + # by a change in cardinality). + aggs_allow_plural: bool = all( + call.op in (pydop.MIN, pydop.MAX, pydop.ANYTHING, pydop.NDISTINCT) + for call in aggregate.aggregations.values() + ) + + # The cardinality with regards to the input being considered must be + # singular (unless the aggregations allow plural), and must be + # filtering (since the point of joining before aggregation is to reduce + # the number of rows to aggregate). + cardinality: JoinCardinality = ( + join.cardinality if is_left else join.reverse_cardinality + ) + + # Verify the cardinality meets the specified criteria, and that the join + # type is INNER/SEMI (since LEFT would not be filtering), where SEMI is + # only allowed if the aggregation is on the left. + if not ( + ( + (join.join_type == JoinType.INNER) + or (join.join_type == JoinType.SEMI and is_left) + ) + and cardinality.filters + and (cardinality.singular or aggs_allow_plural) + ): + return None + + # The alias of the input to the join that corresponds to the + # aggregate. + desired_alias: str | None = ( + join.default_input_aliases[0] if is_left else join.default_input_aliases[1] + ) + + # Find all of the columns used in the join condition that come from the + # aggregate side of the join + self.finder.reset() + join.condition.accept(self.finder) + agg_condition_columns: set[ColumnReference] = { + col + for col in self.finder.get_column_references() + if col.input_name == desired_alias + } + + # Verify ALL of the condition columns from that side of the join are + # in the aggregate keys. + if len(agg_condition_columns) == 0 or any( + col.name not in aggregate.keys for col in agg_condition_columns + ): + return None + + # A mapping that will be used to map every expression with regards to + # the original join looking at its input expressions to what the + # expression will be in the output columns of the new aggregate + + new_join_columns: dict[str, RelationalExpression] = {} + new_aggregate_aggs: dict[str, CallExpression] = {} + new_aggregate_keys: dict[str, RelationalExpression] = {} + + new_condition: RelationalExpression = join.condition + agg_input: RelationalNode = aggregate.inputs[0] + non_agg_input: RelationalNode = join.inputs[1] if is_left else join.inputs[0] + new_join_inputs: list[RelationalNode] = ( + [agg_input, non_agg_input] if is_left else [non_agg_input, agg_input] + ) + + project_columns: dict[str, RelationalExpression] = {} + + # TODO: FINISH THIS + return None + + assert False + + new_join: Join = Join( + new_join_inputs, + new_condition, + join.join_type, + new_join_columns, + join.cardinality, + join.reverse_cardinality, + join.correl_name, + ) + + new_aggregate: Aggregate = Aggregate( + new_join, new_aggregate_keys, new_aggregate_aggs + ) + + return Project(new_aggregate, project_columns) + + +def pull_joins_after_aggregates(node: RelationalRoot) -> RelationalNode: + """ + TODO + """ + shuttle: JoinAggregateTransposeShuttle = JoinAggregateTransposeShuttle() + return node.accept_shuttle(shuttle) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 0f0da3ffc..bd66333bd 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -85,6 +85,7 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree +from .join_agg_transpose import pull_joins_after_aggregates from .merge_projects import merge_projects from .projection_pullup import pullup_projections from .relational_simplification import simplify_expressions @@ -1588,7 +1589,8 @@ def optimize_relational_tree( # A: projection pullup # B: expression simplification # C: filter pushdown - # D: column pruning + # D: join-aggregate transpose + # E: column pruning # This is done because pullup will create more opportunities for expression # simplification, which will allow more filters to be pushed further down, # and the combination of those together will create more opportunities for @@ -1598,6 +1600,7 @@ def optimize_relational_tree( root = confirm_root(pullup_projections(root)) simplify_expressions(root, configs, additional_shuttles) root = confirm_root(push_filters(root, configs)) + root = confirm_root(pull_joins_after_aggregates(root)) root = pruner.prune_unused_columns(root) # Re-run projection merging, without pushing into joins. This will allow diff --git a/tests/test_plan_refsols/cryptbank_agg_03.txt b/tests/test_plan_refsols/cryptbank_agg_03.txt deleted file mode 100644 index 712ab943b..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_03.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('account_type', a_type), ('balance', a_balance), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname))], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[a_type], order=[(a_balance):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_agg_05.txt b/tests/test_plan_refsols/cryptbank_agg_05.txt deleted file mode 100644 index 1e255160c..000000000 --- a/tests/test_plan_refsols/cryptbank_agg_05.txt +++ /dev/null @@ -1,6 +0,0 @@ -ROOT(columns=[('avg_secs', ROUND(avg_expr, 2:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'avg_expr': AVG(DATEDIFF('seconds':string, a_open_ts, min_t_ts))}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_open_ts': t0.a_open_ts, 'min_t_ts': t1.min_t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'min_t_ts': MIN(t_ts)}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_01.txt b/tests/test_plan_refsols/cryptbank_analysis_01.txt deleted file mode 100644 index 4a670a541..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_01.txt +++ /dev/null @@ -1,13 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_sends', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_02.txt b/tests/test_plan_refsols/cryptbank_analysis_02.txt deleted file mode 100644 index 6eeb69743..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_02.txt +++ /dev/null @@ -1,13 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_recvs', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_03.txt b/tests/test_plan_refsols/cryptbank_analysis_03.txt deleted file mode 100644 index 7eb944f5e..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_03.txt +++ /dev/null @@ -1,24 +0,0 @@ -ROOT(columns=[('key', c_key), ('name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('first_sends', DEFAULT_TO(agg_1, 0:numeric)), ('first_recvs', DEFAULT_TO(sum_t_amount, 0:numeric))], orderings=[(DEFAULT_TO(agg_1, 0:numeric) + DEFAULT_TO(sum_t_amount, 0:numeric)):desc_last, (c_key):asc_first], limit=3:numeric) - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'agg_1': t0.sum_t_amount, 'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - JOIN(condition=t0.c_key == t1.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_t_amount': t1.sum_t_amount}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_t_amount': SUM(t_amount)}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(t_ts):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) - JOIN(condition=t0.a_key == t1.t_destaccount, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - JOIN(condition=t0.a_branchkey == t1.b_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key}) - FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04.txt b/tests/test_plan_refsols/cryptbank_analysis_04.txt deleted file mode 100644 index 110bc6b3d..000000000 --- a/tests/test_plan_refsols/cryptbank_analysis_04.txt +++ /dev/null @@ -1,9 +0,0 @@ -ROOT(columns=[('key', a_key), ('cust_name', JOIN_STRINGS(' ':string, c_fname, c_lname)), ('n_trans', n_rows)], orderings=[(a_key):asc_first]) - JOIN(condition=t0.a_key == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=MONOTONIC(1980:numeric, YEAR(c_birthday), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) - AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'n_rows': COUNT()}) - FILTER(condition=t_amount > 9000.0:numeric, columns={'t_sourceaccount': t_sourceaccount}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_sourceaccount': t_sourceaccount}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_11.txt b/tests/test_plan_refsols/cryptbank_filter_count_11.txt deleted file mode 100644 index 80dad3caa..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_11.txt +++ /dev/null @@ -1,8 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'a_key': t0.a_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=c_fname == 'alice':string, columns={'c_key': c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_12.txt b/tests/test_plan_refsols/cryptbank_filter_count_12.txt deleted file mode 100644 index 03bc19679..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_12.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=YEAR(t0.t_ts) == YEAR(t1.a_open_ts) & t0.t_sourceaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_13.txt b/tests/test_plan_refsols/cryptbank_filter_count_13.txt deleted file mode 100644 index 836560846..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_13.txt +++ /dev/null @@ -1,5 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.t_ts < DATETIME(t1.a_open_ts, '+2 years':string) & t0.t_destaccount == t1.a_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - SCAN(table=CRBNK.TRANSACTIONS, columns={'t_destaccount': t_destaccount, 't_ts': t_ts}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_15.txt b/tests/test_plan_refsols/cryptbank_filter_count_15.txt deleted file mode 100644 index 174a826e7..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_15.txt +++ /dev/null @@ -1,3 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=CRBNK.CUSTOMERS, columns={}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_16.txt b/tests/test_plan_refsols/cryptbank_filter_count_16.txt deleted file mode 100644 index 174a826e7..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_16.txt +++ /dev/null @@ -1,3 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=CRBNK.CUSTOMERS, columns={}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28.txt b/tests/test_plan_refsols/cryptbank_filter_count_28.txt deleted file mode 100644 index 4e8ab891b..000000000 --- a/tests/test_plan_refsols/cryptbank_filter_count_28.txt +++ /dev/null @@ -1,7 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == t1.c_key, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(a_open_ts) < 2020:numeric & a_balance >= 5000:numeric & a_type == 'retirement':string | a_type == 'savings':string, columns={'a_custkey': a_custkey}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) - FILTER(condition=CONTAINS(c_email, 'outlook':string) | CONTAINS(c_email, 'gmail':string), columns={'c_key': c_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01.txt b/tests/test_plan_refsols/cryptbank_general_join_01.txt deleted file mode 100644 index c9ed86ebd..000000000 --- a/tests/test_plan_refsols/cryptbank_general_join_01.txt +++ /dev/null @@ -1,12 +0,0 @@ -ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', sum_n_rows)], orderings=[]) - AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.b_key == t1.b_key & t0.c_key == t1.c_key, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t1.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - AGGREGATE(keys={'b_key': b_key, 'c_key': c_key}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_key == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(t1.c_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) - SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) - SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02.txt b/tests/test_plan_refsols/cryptbank_general_join_02.txt deleted file mode 100644 index 9a64589d2..000000000 --- a/tests/test_plan_refsols/cryptbank_general_join_02.txt +++ /dev/null @@ -1,3 +0,0 @@ -ROOT(columns=[('n', n_rows)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=CRBNK.ACCOUNTS, columns={})